xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e0b7e82fd3cf27fce84cc3e37e8d70a5c36a2d4e)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)(B)->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_DEBUG)
461   #if defined(PETSC_USE_CTABLE)
462             {
463               PetscInt data;
464               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
465               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
466             }
467   #else
468             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
469   #endif
470 #endif
471 #if defined(PETSC_USE_CTABLE)
472             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
473             col = (col - 1) / bs;
474 #else
475             col = (baij->colmap[in[j]] - 1) / bs;
476 #endif
477             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
478               PetscCall(MatDisAssemble_MPIBAIJ(mat));
479               col = in[j];
480             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
481           } else col = in[j];
482           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
483         }
484       }
485     } else {
486       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
487       if (!baij->donotstash) {
488         if (roworiented) {
489           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
490         } else {
491           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
492         }
493       }
494     }
495   }
496   PetscFunctionReturn(PETSC_SUCCESS);
497 }
498 
499 #define HASH_KEY             0.6180339887
500 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
501 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
502 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
503 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
504 {
505   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
506   PetscBool    roworiented = baij->roworiented;
507   PetscInt     i, j, row, col;
508   PetscInt     rstart_orig = mat->rmap->rstart;
509   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
510   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
511   PetscReal    tmp;
512   MatScalar  **HD       = baij->hd, value;
513   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
514 
515   PetscFunctionBegin;
516   for (i = 0; i < m; i++) {
517     if (PetscDefined(USE_DEBUG)) {
518       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
519       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
520     }
521     row = im[i];
522     if (row >= rstart_orig && row < rend_orig) {
523       for (j = 0; j < n; j++) {
524         col = in[j];
525         if (roworiented) value = v[i * n + j];
526         else value = v[i + j * m];
527         /* Look up PetscInto the Hash Table */
528         key = (row / bs) * Nbs + (col / bs) + 1;
529         h1  = HASH(size, key, tmp);
530 
531         idx = h1;
532         if (PetscDefined(USE_DEBUG)) {
533           insert_ct++;
534           total_ct++;
535           if (HT[idx] != key) {
536             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
537             if (idx == size) {
538               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
539               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
540             }
541           }
542         } else if (HT[idx] != key) {
543           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
544           if (idx == size) {
545             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
546             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
547           }
548         }
549         /* A HASH table entry is found, so insert the values at the correct address */
550         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
551         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
552       }
553     } else if (!baij->donotstash) {
554       if (roworiented) {
555         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
556       } else {
557         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
558       }
559     }
560   }
561   if (PetscDefined(USE_DEBUG)) {
562     baij->ht_total_ct += total_ct;
563     baij->ht_insert_ct += insert_ct;
564   }
565   PetscFunctionReturn(PETSC_SUCCESS);
566 }
567 
568 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
569 {
570   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
571   PetscBool          roworiented = baij->roworiented;
572   PetscInt           i, j, ii, jj, row, col;
573   PetscInt           rstart = baij->rstartbs;
574   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
575   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
576   PetscReal          tmp;
577   MatScalar        **HD = baij->hd, *baij_a;
578   const PetscScalar *v_t, *value;
579   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
580 
581   PetscFunctionBegin;
582   if (roworiented) stepval = (n - 1) * bs;
583   else stepval = (m - 1) * bs;
584 
585   for (i = 0; i < m; i++) {
586     if (PetscDefined(USE_DEBUG)) {
587       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
588       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
589     }
590     row = im[i];
591     v_t = v + i * nbs2;
592     if (row >= rstart && row < rend) {
593       for (j = 0; j < n; j++) {
594         col = in[j];
595 
596         /* Look up into the Hash Table */
597         key = row * Nbs + col + 1;
598         h1  = HASH(size, key, tmp);
599 
600         idx = h1;
601         if (PetscDefined(USE_DEBUG)) {
602           total_ct++;
603           insert_ct++;
604           if (HT[idx] != key) {
605             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
606             if (idx == size) {
607               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
608               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
609             }
610           }
611         } else if (HT[idx] != key) {
612           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
613           if (idx == size) {
614             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
615             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
616           }
617         }
618         baij_a = HD[idx];
619         if (roworiented) {
620           /*value = v + i*(stepval+bs)*bs + j*bs;*/
621           /* value = v + (i*(stepval+bs)+j)*bs; */
622           value = v_t;
623           v_t += bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval) {
626               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval) {
630               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
631             }
632           }
633         } else {
634           value = v + j * (stepval + bs) * bs + i * bs;
635           if (addv == ADD_VALUES) {
636             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
637               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
638             }
639           } else {
640             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
641               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
642             }
643           }
644         }
645       }
646     } else {
647       if (!baij->donotstash) {
648         if (roworiented) {
649           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
650         } else {
651           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
652         }
653       }
654     }
655   }
656   if (PetscDefined(USE_DEBUG)) {
657     baij->ht_total_ct += total_ct;
658     baij->ht_insert_ct += insert_ct;
659   }
660   PetscFunctionReturn(PETSC_SUCCESS);
661 }
662 
663 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
664 {
665   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
666   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
667   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
668 
669   PetscFunctionBegin;
670   for (i = 0; i < m; i++) {
671     if (idxm[i] < 0) continue; /* negative row */
672     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
673     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
674       row = idxm[i] - bsrstart;
675       for (j = 0; j < n; j++) {
676         if (idxn[j] < 0) continue; /* negative column */
677         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
678         if (idxn[j] >= bscstart && idxn[j] < bscend) {
679           col = idxn[j] - bscstart;
680           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
681         } else {
682           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
683 #if defined(PETSC_USE_CTABLE)
684           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
685           data--;
686 #else
687           data = baij->colmap[idxn[j] / bs] - 1;
688 #endif
689           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
690           else {
691             col = data + idxn[j] % bs;
692             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
693           }
694         }
695       }
696     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
697   }
698   PetscFunctionReturn(PETSC_SUCCESS);
699 }
700 
701 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
702 {
703   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
704   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
705   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
706   PetscReal    sum = 0.0;
707   MatScalar   *v;
708 
709   PetscFunctionBegin;
710   if (baij->size == 1) {
711     PetscCall(MatNorm(baij->A, type, nrm));
712   } else {
713     if (type == NORM_FROBENIUS) {
714       v  = amat->a;
715       nz = amat->nz * bs2;
716       for (i = 0; i < nz; i++) {
717         sum += PetscRealPart(PetscConj(*v) * (*v));
718         v++;
719       }
720       v  = bmat->a;
721       nz = bmat->nz * bs2;
722       for (i = 0; i < nz; i++) {
723         sum += PetscRealPart(PetscConj(*v) * (*v));
724         v++;
725       }
726       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
727       *nrm = PetscSqrtReal(*nrm);
728     } else if (type == NORM_1) { /* max column sum */
729       PetscReal *tmp, *tmp2;
730       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
731       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
732       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
733       v  = amat->a;
734       jj = amat->j;
735       for (i = 0; i < amat->nz; i++) {
736         for (j = 0; j < bs; j++) {
737           col = bs * (cstart + *jj) + j; /* column index */
738           for (row = 0; row < bs; row++) {
739             tmp[col] += PetscAbsScalar(*v);
740             v++;
741           }
742         }
743         jj++;
744       }
745       v  = bmat->a;
746       jj = bmat->j;
747       for (i = 0; i < bmat->nz; i++) {
748         for (j = 0; j < bs; j++) {
749           col = bs * garray[*jj] + j;
750           for (row = 0; row < bs; row++) {
751             tmp[col] += PetscAbsScalar(*v);
752             v++;
753           }
754         }
755         jj++;
756       }
757       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
758       *nrm = 0.0;
759       for (j = 0; j < mat->cmap->N; j++) {
760         if (tmp2[j] > *nrm) *nrm = tmp2[j];
761       }
762       PetscCall(PetscFree(tmp));
763       PetscCall(PetscFree(tmp2));
764     } else if (type == NORM_INFINITY) { /* max row sum */
765       PetscReal *sums;
766       PetscCall(PetscMalloc1(bs, &sums));
767       sum = 0.0;
768       for (j = 0; j < amat->mbs; j++) {
769         for (row = 0; row < bs; row++) sums[row] = 0.0;
770         v  = amat->a + bs2 * amat->i[j];
771         nz = amat->i[j + 1] - amat->i[j];
772         for (i = 0; i < nz; i++) {
773           for (col = 0; col < bs; col++) {
774             for (row = 0; row < bs; row++) {
775               sums[row] += PetscAbsScalar(*v);
776               v++;
777             }
778           }
779         }
780         v  = bmat->a + bs2 * bmat->i[j];
781         nz = bmat->i[j + 1] - bmat->i[j];
782         for (i = 0; i < nz; i++) {
783           for (col = 0; col < bs; col++) {
784             for (row = 0; row < bs; row++) {
785               sums[row] += PetscAbsScalar(*v);
786               v++;
787             }
788           }
789         }
790         for (row = 0; row < bs; row++) {
791           if (sums[row] > sum) sum = sums[row];
792         }
793       }
794       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
795       PetscCall(PetscFree(sums));
796     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
797   }
798   PetscFunctionReturn(PETSC_SUCCESS);
799 }
800 
801 /*
802   Creates the hash table, and sets the table
803   This table is created only once.
804   If new entried need to be added to the matrix
805   then the hash table has to be destroyed and
806   recreated.
807 */
808 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
809 {
810   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
811   Mat          A = baij->A, B = baij->B;
812   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
813   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
814   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
815   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
816   PetscInt    *HT, key;
817   MatScalar  **HD;
818   PetscReal    tmp;
819 #if defined(PETSC_USE_INFO)
820   PetscInt ct = 0, max = 0;
821 #endif
822 
823   PetscFunctionBegin;
824   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
825 
826   baij->ht_size = (PetscInt)(factor * nz);
827   ht_size       = baij->ht_size;
828 
829   /* Allocate Memory for Hash Table */
830   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
831   HD = baij->hd;
832   HT = baij->ht;
833 
834   /* Loop Over A */
835   for (i = 0; i < a->mbs; i++) {
836     for (j = ai[i]; j < ai[i + 1]; j++) {
837       row = i + rstart;
838       col = aj[j] + cstart;
839 
840       key = row * Nbs + col + 1;
841       h1  = HASH(ht_size, key, tmp);
842       for (k = 0; k < ht_size; k++) {
843         if (!HT[(h1 + k) % ht_size]) {
844           HT[(h1 + k) % ht_size] = key;
845           HD[(h1 + k) % ht_size] = a->a + j * bs2;
846           break;
847 #if defined(PETSC_USE_INFO)
848         } else {
849           ct++;
850 #endif
851         }
852       }
853 #if defined(PETSC_USE_INFO)
854       if (k > max) max = k;
855 #endif
856     }
857   }
858   /* Loop Over B */
859   for (i = 0; i < b->mbs; i++) {
860     for (j = bi[i]; j < bi[i + 1]; j++) {
861       row = i + rstart;
862       col = garray[bj[j]];
863       key = row * Nbs + col + 1;
864       h1  = HASH(ht_size, key, tmp);
865       for (k = 0; k < ht_size; k++) {
866         if (!HT[(h1 + k) % ht_size]) {
867           HT[(h1 + k) % ht_size] = key;
868           HD[(h1 + k) % ht_size] = b->a + j * bs2;
869           break;
870 #if defined(PETSC_USE_INFO)
871         } else {
872           ct++;
873 #endif
874         }
875       }
876 #if defined(PETSC_USE_INFO)
877       if (k > max) max = k;
878 #endif
879     }
880   }
881 
882   /* Print Summary */
883 #if defined(PETSC_USE_INFO)
884   for (i = 0, j = 0; i < ht_size; i++) {
885     if (HT[i]) j++;
886   }
887   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
888 #endif
889   PetscFunctionReturn(PETSC_SUCCESS);
890 }
891 
892 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
893 {
894   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
895   PetscInt     nstash, reallocs;
896 
897   PetscFunctionBegin;
898   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
899 
900   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
901   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
902   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
903   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
904   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
905   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
906   PetscFunctionReturn(PETSC_SUCCESS);
907 }
908 
909 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
910 {
911   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
912   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
913   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
914   PetscInt    *row, *col;
915   PetscBool    r1, r2, r3, other_disassembled;
916   MatScalar   *val;
917   PetscMPIInt  n;
918 
919   PetscFunctionBegin;
920   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
921   if (!baij->donotstash && !mat->nooffprocentries) {
922     while (1) {
923       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
924       if (!flg) break;
925 
926       for (i = 0; i < n;) {
927         /* Now identify the consecutive vals belonging to the same row */
928         for (j = i, rstart = row[j]; j < n; j++) {
929           if (row[j] != rstart) break;
930         }
931         if (j < n) ncols = j - i;
932         else ncols = n - i;
933         /* Now assemble all these values with a single function call */
934         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
935         i = j;
936       }
937     }
938     PetscCall(MatStashScatterEnd_Private(&mat->stash));
939     /* Now process the block-stash. Since the values are stashed column-oriented,
940        set the row-oriented flag to column-oriented, and after MatSetValues()
941        restore the original flags */
942     r1 = baij->roworiented;
943     r2 = a->roworiented;
944     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
945 
946     baij->roworiented                           = PETSC_FALSE;
947     a->roworiented                              = PETSC_FALSE;
948     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
949     while (1) {
950       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
951       if (!flg) break;
952 
953       for (i = 0; i < n;) {
954         /* Now identify the consecutive vals belonging to the same row */
955         for (j = i, rstart = row[j]; j < n; j++) {
956           if (row[j] != rstart) break;
957         }
958         if (j < n) ncols = j - i;
959         else ncols = n - i;
960         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
961         i = j;
962       }
963     }
964     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
965 
966     baij->roworiented                           = r1;
967     a->roworiented                              = r2;
968     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
969   }
970 
971   PetscCall(MatAssemblyBegin(baij->A, mode));
972   PetscCall(MatAssemblyEnd(baij->A, mode));
973 
974   /* determine if any processor has disassembled, if so we must
975      also disassemble ourselves, in order that we may reassemble. */
976   /*
977      if nonzero structure of submatrix B cannot change then we know that
978      no processor disassembled thus we can skip this stuff
979   */
980   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
981     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
982     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
983   }
984 
985   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
986   PetscCall(MatAssemblyBegin(baij->B, mode));
987   PetscCall(MatAssemblyEnd(baij->B, mode));
988 
989 #if defined(PETSC_USE_INFO)
990   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
991     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
992 
993     baij->ht_total_ct  = 0;
994     baij->ht_insert_ct = 0;
995   }
996 #endif
997   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
998     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
999 
1000     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1001     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1002   }
1003 
1004   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1005 
1006   baij->rowvalues = NULL;
1007 
1008   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1009   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
1010     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1011     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1012   }
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1017 #include <petscdraw.h>
1018 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1019 {
1020   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1021   PetscMPIInt       rank = baij->rank;
1022   PetscInt          bs   = mat->rmap->bs;
1023   PetscBool         iascii, isdraw;
1024   PetscViewer       sviewer;
1025   PetscViewerFormat format;
1026 
1027   PetscFunctionBegin;
1028   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1029   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1030   if (iascii) {
1031     PetscCall(PetscViewerGetFormat(viewer, &format));
1032     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1033       MatInfo info;
1034       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1035       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1036       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1037       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1038                                                    mat->rmap->bs, (double)info.memory));
1039       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1040       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1041       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1042       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1043       PetscCall(PetscViewerFlush(viewer));
1044       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1045       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1046       PetscCall(VecScatterView(baij->Mvctx, viewer));
1047       PetscFunctionReturn(PETSC_SUCCESS);
1048     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1049       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1050       PetscFunctionReturn(PETSC_SUCCESS);
1051     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1052       PetscFunctionReturn(PETSC_SUCCESS);
1053     }
1054   }
1055 
1056   if (isdraw) {
1057     PetscDraw draw;
1058     PetscBool isnull;
1059     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1060     PetscCall(PetscDrawIsNull(draw, &isnull));
1061     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1062   }
1063 
1064   {
1065     /* assemble the entire matrix onto first processor. */
1066     Mat          A;
1067     Mat_SeqBAIJ *Aloc;
1068     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1069     MatScalar   *a;
1070     const char  *matname;
1071 
1072     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1073     /* Perhaps this should be the type of mat? */
1074     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1075     if (rank == 0) {
1076       PetscCall(MatSetSizes(A, M, N, M, N));
1077     } else {
1078       PetscCall(MatSetSizes(A, 0, 0, M, N));
1079     }
1080     PetscCall(MatSetType(A, MATMPIBAIJ));
1081     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1082     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1083 
1084     /* copy over the A part */
1085     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1086     ai   = Aloc->i;
1087     aj   = Aloc->j;
1088     a    = Aloc->a;
1089     PetscCall(PetscMalloc1(bs, &rvals));
1090 
1091     for (i = 0; i < mbs; i++) {
1092       rvals[0] = bs * (baij->rstartbs + i);
1093       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1094       for (j = ai[i]; j < ai[i + 1]; j++) {
1095         col = (baij->cstartbs + aj[j]) * bs;
1096         for (k = 0; k < bs; k++) {
1097           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1098           col++;
1099           a += bs;
1100         }
1101       }
1102     }
1103     /* copy over the B part */
1104     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1105     ai   = Aloc->i;
1106     aj   = Aloc->j;
1107     a    = Aloc->a;
1108     for (i = 0; i < mbs; i++) {
1109       rvals[0] = bs * (baij->rstartbs + i);
1110       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1111       for (j = ai[i]; j < ai[i + 1]; j++) {
1112         col = baij->garray[aj[j]] * bs;
1113         for (k = 0; k < bs; k++) {
1114           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1115           col++;
1116           a += bs;
1117         }
1118       }
1119     }
1120     PetscCall(PetscFree(rvals));
1121     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1122     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1123     /*
1124        Everyone has to call to draw the matrix since the graphics waits are
1125        synchronized across all processors that share the PetscDraw object
1126     */
1127     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1128     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1129     if (rank == 0) {
1130       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1131       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1132     }
1133     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1134     PetscCall(MatDestroy(&A));
1135   }
1136   PetscFunctionReturn(PETSC_SUCCESS);
1137 }
1138 
1139 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1140 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1141 {
1142   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1143   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1144   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1145   const PetscInt *garray = aij->garray;
1146   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1147   PetscInt64      nz, hnz;
1148   PetscInt       *rowlens, *colidxs;
1149   PetscScalar    *matvals;
1150   PetscMPIInt     rank;
1151 
1152   PetscFunctionBegin;
1153   PetscCall(PetscViewerSetUp(viewer));
1154 
1155   M  = mat->rmap->N;
1156   N  = mat->cmap->N;
1157   m  = mat->rmap->n;
1158   rs = mat->rmap->rstart;
1159   cs = mat->cmap->rstart;
1160   bs = mat->rmap->bs;
1161   nz = bs * bs * (A->nz + B->nz);
1162 
1163   /* write matrix header */
1164   header[0] = MAT_FILE_CLASSID;
1165   header[1] = M;
1166   header[2] = N;
1167   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1168   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1169   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1170   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1171 
1172   /* fill in and store row lengths */
1173   PetscCall(PetscMalloc1(m, &rowlens));
1174   for (cnt = 0, i = 0; i < A->mbs; i++)
1175     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1176   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1177   PetscCall(PetscFree(rowlens));
1178 
1179   /* fill in and store column indices */
1180   PetscCall(PetscMalloc1(nz, &colidxs));
1181   for (cnt = 0, i = 0; i < A->mbs; i++) {
1182     for (k = 0; k < bs; k++) {
1183       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1184         if (garray[B->j[jb]] > cs / bs) break;
1185         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1186       }
1187       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1188         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1189       for (; jb < B->i[i + 1]; jb++)
1190         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1191     }
1192   }
1193   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1194   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1195   PetscCall(PetscFree(colidxs));
1196 
1197   /* fill in and store nonzero values */
1198   PetscCall(PetscMalloc1(nz, &matvals));
1199   for (cnt = 0, i = 0; i < A->mbs; i++) {
1200     for (k = 0; k < bs; k++) {
1201       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1202         if (garray[B->j[jb]] > cs / bs) break;
1203         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1204       }
1205       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1206         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1207       for (; jb < B->i[i + 1]; jb++)
1208         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1209     }
1210   }
1211   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1212   PetscCall(PetscFree(matvals));
1213 
1214   /* write block size option to the viewer's .info file */
1215   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1216   PetscFunctionReturn(PETSC_SUCCESS);
1217 }
1218 
1219 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1220 {
1221   PetscBool iascii, isdraw, issocket, isbinary;
1222 
1223   PetscFunctionBegin;
1224   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1225   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1226   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1227   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1228   if (iascii || isdraw || issocket) {
1229     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1230   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1231   PetscFunctionReturn(PETSC_SUCCESS);
1232 }
1233 
1234 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1235 {
1236   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1237   PetscInt     nt;
1238 
1239   PetscFunctionBegin;
1240   PetscCall(VecGetLocalSize(xx, &nt));
1241   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1242   PetscCall(VecGetLocalSize(yy, &nt));
1243   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1244   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1245   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1246   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1247   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1248   PetscFunctionReturn(PETSC_SUCCESS);
1249 }
1250 
1251 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1252 {
1253   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1254 
1255   PetscFunctionBegin;
1256   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1257   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1258   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1259   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1260   PetscFunctionReturn(PETSC_SUCCESS);
1261 }
1262 
1263 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1264 {
1265   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1266 
1267   PetscFunctionBegin;
1268   /* do nondiagonal part */
1269   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1270   /* do local part */
1271   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1272   /* add partial results together */
1273   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1274   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1275   PetscFunctionReturn(PETSC_SUCCESS);
1276 }
1277 
1278 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1279 {
1280   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1281 
1282   PetscFunctionBegin;
1283   /* do nondiagonal part */
1284   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1285   /* do local part */
1286   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1287   /* add partial results together */
1288   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1289   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1290   PetscFunctionReturn(PETSC_SUCCESS);
1291 }
1292 
1293 /*
1294   This only works correctly for square matrices where the subblock A->A is the
1295    diagonal block
1296 */
1297 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1298 {
1299   PetscFunctionBegin;
1300   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1301   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1302   PetscFunctionReturn(PETSC_SUCCESS);
1303 }
1304 
1305 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1306 {
1307   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1308 
1309   PetscFunctionBegin;
1310   PetscCall(MatScale(a->A, aa));
1311   PetscCall(MatScale(a->B, aa));
1312   PetscFunctionReturn(PETSC_SUCCESS);
1313 }
1314 
1315 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1316 {
1317   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1318   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1319   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1320   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1321   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1322 
1323   PetscFunctionBegin;
1324   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1325   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1326   mat->getrowactive = PETSC_TRUE;
1327 
1328   if (!mat->rowvalues && (idx || v)) {
1329     /*
1330         allocate enough space to hold information from the longest row.
1331     */
1332     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1333     PetscInt     max = 1, mbs = mat->mbs, tmp;
1334     for (i = 0; i < mbs; i++) {
1335       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1336       if (max < tmp) max = tmp;
1337     }
1338     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1339   }
1340   lrow = row - brstart;
1341 
1342   pvA = &vworkA;
1343   pcA = &cworkA;
1344   pvB = &vworkB;
1345   pcB = &cworkB;
1346   if (!v) {
1347     pvA = NULL;
1348     pvB = NULL;
1349   }
1350   if (!idx) {
1351     pcA = NULL;
1352     if (!v) pcB = NULL;
1353   }
1354   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1355   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1356   nztot = nzA + nzB;
1357 
1358   cmap = mat->garray;
1359   if (v || idx) {
1360     if (nztot) {
1361       /* Sort by increasing column numbers, assuming A and B already sorted */
1362       PetscInt imark = -1;
1363       if (v) {
1364         *v = v_p = mat->rowvalues;
1365         for (i = 0; i < nzB; i++) {
1366           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1367           else break;
1368         }
1369         imark = i;
1370         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1371         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1372       }
1373       if (idx) {
1374         *idx = idx_p = mat->rowindices;
1375         if (imark > -1) {
1376           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1377         } else {
1378           for (i = 0; i < nzB; i++) {
1379             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1380             else break;
1381           }
1382           imark = i;
1383         }
1384         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1385         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1386       }
1387     } else {
1388       if (idx) *idx = NULL;
1389       if (v) *v = NULL;
1390     }
1391   }
1392   *nz = nztot;
1393   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1394   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1395   PetscFunctionReturn(PETSC_SUCCESS);
1396 }
1397 
1398 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1399 {
1400   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1401 
1402   PetscFunctionBegin;
1403   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1404   baij->getrowactive = PETSC_FALSE;
1405   PetscFunctionReturn(PETSC_SUCCESS);
1406 }
1407 
1408 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1409 {
1410   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1411 
1412   PetscFunctionBegin;
1413   PetscCall(MatZeroEntries(l->A));
1414   PetscCall(MatZeroEntries(l->B));
1415   PetscFunctionReturn(PETSC_SUCCESS);
1416 }
1417 
1418 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1419 {
1420   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1421   Mat            A = a->A, B = a->B;
1422   PetscLogDouble isend[5], irecv[5];
1423 
1424   PetscFunctionBegin;
1425   info->block_size = (PetscReal)matin->rmap->bs;
1426 
1427   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1428 
1429   isend[0] = info->nz_used;
1430   isend[1] = info->nz_allocated;
1431   isend[2] = info->nz_unneeded;
1432   isend[3] = info->memory;
1433   isend[4] = info->mallocs;
1434 
1435   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1436 
1437   isend[0] += info->nz_used;
1438   isend[1] += info->nz_allocated;
1439   isend[2] += info->nz_unneeded;
1440   isend[3] += info->memory;
1441   isend[4] += info->mallocs;
1442 
1443   if (flag == MAT_LOCAL) {
1444     info->nz_used      = isend[0];
1445     info->nz_allocated = isend[1];
1446     info->nz_unneeded  = isend[2];
1447     info->memory       = isend[3];
1448     info->mallocs      = isend[4];
1449   } else if (flag == MAT_GLOBAL_MAX) {
1450     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1451 
1452     info->nz_used      = irecv[0];
1453     info->nz_allocated = irecv[1];
1454     info->nz_unneeded  = irecv[2];
1455     info->memory       = irecv[3];
1456     info->mallocs      = irecv[4];
1457   } else if (flag == MAT_GLOBAL_SUM) {
1458     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1459 
1460     info->nz_used      = irecv[0];
1461     info->nz_allocated = irecv[1];
1462     info->nz_unneeded  = irecv[2];
1463     info->memory       = irecv[3];
1464     info->mallocs      = irecv[4];
1465   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1466   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1467   info->fill_ratio_needed = 0;
1468   info->factor_mallocs    = 0;
1469   PetscFunctionReturn(PETSC_SUCCESS);
1470 }
1471 
1472 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1473 {
1474   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1475 
1476   PetscFunctionBegin;
1477   switch (op) {
1478   case MAT_NEW_NONZERO_LOCATIONS:
1479   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1480   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1481   case MAT_KEEP_NONZERO_PATTERN:
1482   case MAT_NEW_NONZERO_LOCATION_ERR:
1483     MatCheckPreallocated(A, 1);
1484     PetscCall(MatSetOption(a->A, op, flg));
1485     PetscCall(MatSetOption(a->B, op, flg));
1486     break;
1487   case MAT_ROW_ORIENTED:
1488     MatCheckPreallocated(A, 1);
1489     a->roworiented = flg;
1490 
1491     PetscCall(MatSetOption(a->A, op, flg));
1492     PetscCall(MatSetOption(a->B, op, flg));
1493     break;
1494   case MAT_FORCE_DIAGONAL_ENTRIES:
1495   case MAT_SORTED_FULL:
1496     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1497     break;
1498   case MAT_IGNORE_OFF_PROC_ENTRIES:
1499     a->donotstash = flg;
1500     break;
1501   case MAT_USE_HASH_TABLE:
1502     a->ht_flag = flg;
1503     a->ht_fact = 1.39;
1504     break;
1505   case MAT_SYMMETRIC:
1506   case MAT_STRUCTURALLY_SYMMETRIC:
1507   case MAT_HERMITIAN:
1508   case MAT_SUBMAT_SINGLEIS:
1509   case MAT_SYMMETRY_ETERNAL:
1510   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1511   case MAT_SPD_ETERNAL:
1512     /* if the diagonal matrix is square it inherits some of the properties above */
1513     break;
1514   default:
1515     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1516   }
1517   PetscFunctionReturn(PETSC_SUCCESS);
1518 }
1519 
1520 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1521 {
1522   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1523   Mat_SeqBAIJ *Aloc;
1524   Mat          B;
1525   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1526   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1527   MatScalar   *a;
1528 
1529   PetscFunctionBegin;
1530   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1531   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1532     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1533     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1534     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1535     /* Do not know preallocation information, but must set block size */
1536     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1537   } else {
1538     B = *matout;
1539   }
1540 
1541   /* copy over the A part */
1542   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1543   ai   = Aloc->i;
1544   aj   = Aloc->j;
1545   a    = Aloc->a;
1546   PetscCall(PetscMalloc1(bs, &rvals));
1547 
1548   for (i = 0; i < mbs; i++) {
1549     rvals[0] = bs * (baij->rstartbs + i);
1550     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1551     for (j = ai[i]; j < ai[i + 1]; j++) {
1552       col = (baij->cstartbs + aj[j]) * bs;
1553       for (k = 0; k < bs; k++) {
1554         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1555 
1556         col++;
1557         a += bs;
1558       }
1559     }
1560   }
1561   /* copy over the B part */
1562   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1563   ai   = Aloc->i;
1564   aj   = Aloc->j;
1565   a    = Aloc->a;
1566   for (i = 0; i < mbs; i++) {
1567     rvals[0] = bs * (baij->rstartbs + i);
1568     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1569     for (j = ai[i]; j < ai[i + 1]; j++) {
1570       col = baij->garray[aj[j]] * bs;
1571       for (k = 0; k < bs; k++) {
1572         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1573         col++;
1574         a += bs;
1575       }
1576     }
1577   }
1578   PetscCall(PetscFree(rvals));
1579   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1580   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1581 
1582   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1583   else PetscCall(MatHeaderMerge(A, &B));
1584   PetscFunctionReturn(PETSC_SUCCESS);
1585 }
1586 
1587 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1588 {
1589   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1590   Mat          a = baij->A, b = baij->B;
1591   PetscInt     s1, s2, s3;
1592 
1593   PetscFunctionBegin;
1594   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1595   if (rr) {
1596     PetscCall(VecGetLocalSize(rr, &s1));
1597     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1598     /* Overlap communication with computation. */
1599     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1600   }
1601   if (ll) {
1602     PetscCall(VecGetLocalSize(ll, &s1));
1603     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1604     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1605   }
1606   /* scale  the diagonal block */
1607   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1608 
1609   if (rr) {
1610     /* Do a scatter end and then right scale the off-diagonal block */
1611     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1612     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1613   }
1614   PetscFunctionReturn(PETSC_SUCCESS);
1615 }
1616 
1617 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1618 {
1619   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1620   PetscInt    *lrows;
1621   PetscInt     r, len;
1622   PetscBool    cong;
1623 
1624   PetscFunctionBegin;
1625   /* get locally owned rows */
1626   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1627   /* fix right-hand side if needed */
1628   if (x && b) {
1629     const PetscScalar *xx;
1630     PetscScalar       *bb;
1631 
1632     PetscCall(VecGetArrayRead(x, &xx));
1633     PetscCall(VecGetArray(b, &bb));
1634     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1635     PetscCall(VecRestoreArrayRead(x, &xx));
1636     PetscCall(VecRestoreArray(b, &bb));
1637   }
1638 
1639   /* actually zap the local rows */
1640   /*
1641         Zero the required rows. If the "diagonal block" of the matrix
1642      is square and the user wishes to set the diagonal we use separate
1643      code so that MatSetValues() is not called for each diagonal allocating
1644      new memory, thus calling lots of mallocs and slowing things down.
1645 
1646   */
1647   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1648   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1649   PetscCall(MatHasCongruentLayouts(A, &cong));
1650   if ((diag != 0.0) && cong) {
1651     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1652   } else if (diag != 0.0) {
1653     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1654     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1655     for (r = 0; r < len; ++r) {
1656       const PetscInt row = lrows[r] + A->rmap->rstart;
1657       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1658     }
1659     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1660     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1661   } else {
1662     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1663   }
1664   PetscCall(PetscFree(lrows));
1665 
1666   /* only change matrix nonzero state if pattern was allowed to be changed */
1667   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1668     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1669     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1670   }
1671   PetscFunctionReturn(PETSC_SUCCESS);
1672 }
1673 
1674 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1675 {
1676   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1677   PetscMPIInt        n = A->rmap->n, p = 0;
1678   PetscInt           i, j, k, r, len = 0, row, col, count;
1679   PetscInt          *lrows, *owners = A->rmap->range;
1680   PetscSFNode       *rrows;
1681   PetscSF            sf;
1682   const PetscScalar *xx;
1683   PetscScalar       *bb, *mask;
1684   Vec                xmask, lmask;
1685   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1686   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1687   PetscScalar       *aa;
1688 
1689   PetscFunctionBegin;
1690   /* Create SF where leaves are input rows and roots are owned rows */
1691   PetscCall(PetscMalloc1(n, &lrows));
1692   for (r = 0; r < n; ++r) lrows[r] = -1;
1693   PetscCall(PetscMalloc1(N, &rrows));
1694   for (r = 0; r < N; ++r) {
1695     const PetscInt idx = rows[r];
1696     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1697     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1698       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1699     }
1700     rrows[r].rank  = p;
1701     rrows[r].index = rows[r] - owners[p];
1702   }
1703   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1704   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1705   /* Collect flags for rows to be zeroed */
1706   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1707   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1708   PetscCall(PetscSFDestroy(&sf));
1709   /* Compress and put in row numbers */
1710   for (r = 0; r < n; ++r)
1711     if (lrows[r] >= 0) lrows[len++] = r;
1712   /* zero diagonal part of matrix */
1713   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1714   /* handle off-diagonal part of matrix */
1715   PetscCall(MatCreateVecs(A, &xmask, NULL));
1716   PetscCall(VecDuplicate(l->lvec, &lmask));
1717   PetscCall(VecGetArray(xmask, &bb));
1718   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1719   PetscCall(VecRestoreArray(xmask, &bb));
1720   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1721   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1722   PetscCall(VecDestroy(&xmask));
1723   if (x) {
1724     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1725     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1726     PetscCall(VecGetArrayRead(l->lvec, &xx));
1727     PetscCall(VecGetArray(b, &bb));
1728   }
1729   PetscCall(VecGetArray(lmask, &mask));
1730   /* remove zeroed rows of off-diagonal matrix */
1731   for (i = 0; i < len; ++i) {
1732     row   = lrows[i];
1733     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1734     aa    = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs);
1735     for (k = 0; k < count; ++k) {
1736       aa[0] = 0.0;
1737       aa += bs;
1738     }
1739   }
1740   /* loop over all elements of off process part of matrix zeroing removed columns*/
1741   for (i = 0; i < l->B->rmap->N; ++i) {
1742     row = i / bs;
1743     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1744       for (k = 0; k < bs; ++k) {
1745         col = bs * baij->j[j] + k;
1746         if (PetscAbsScalar(mask[col])) {
1747           aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k;
1748           if (x) bb[i] -= aa[0] * xx[col];
1749           aa[0] = 0.0;
1750         }
1751       }
1752     }
1753   }
1754   if (x) {
1755     PetscCall(VecRestoreArray(b, &bb));
1756     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1757   }
1758   PetscCall(VecRestoreArray(lmask, &mask));
1759   PetscCall(VecDestroy(&lmask));
1760   PetscCall(PetscFree(lrows));
1761 
1762   /* only change matrix nonzero state if pattern was allowed to be changed */
1763   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1764     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1765     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1766   }
1767   PetscFunctionReturn(PETSC_SUCCESS);
1768 }
1769 
1770 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1771 {
1772   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1773 
1774   PetscFunctionBegin;
1775   PetscCall(MatSetUnfactored(a->A));
1776   PetscFunctionReturn(PETSC_SUCCESS);
1777 }
1778 
1779 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1780 
1781 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1782 {
1783   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1784   Mat          a, b, c, d;
1785   PetscBool    flg;
1786 
1787   PetscFunctionBegin;
1788   a = matA->A;
1789   b = matA->B;
1790   c = matB->A;
1791   d = matB->B;
1792 
1793   PetscCall(MatEqual(a, c, &flg));
1794   if (flg) PetscCall(MatEqual(b, d, &flg));
1795   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1796   PetscFunctionReturn(PETSC_SUCCESS);
1797 }
1798 
1799 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1800 {
1801   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1802   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1803 
1804   PetscFunctionBegin;
1805   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1806   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1807     PetscCall(MatCopy_Basic(A, B, str));
1808   } else {
1809     PetscCall(MatCopy(a->A, b->A, str));
1810     PetscCall(MatCopy(a->B, b->B, str));
1811   }
1812   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1813   PetscFunctionReturn(PETSC_SUCCESS);
1814 }
1815 
1816 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1817 {
1818   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1819   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1820   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1821 
1822   PetscFunctionBegin;
1823   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1824   PetscFunctionReturn(PETSC_SUCCESS);
1825 }
1826 
1827 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1828 {
1829   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1830   PetscBLASInt bnz, one                         = 1;
1831   Mat_SeqBAIJ *x, *y;
1832   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1833 
1834   PetscFunctionBegin;
1835   if (str == SAME_NONZERO_PATTERN) {
1836     PetscScalar alpha = a;
1837     x                 = (Mat_SeqBAIJ *)xx->A->data;
1838     y                 = (Mat_SeqBAIJ *)yy->A->data;
1839     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1840     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1841     x = (Mat_SeqBAIJ *)xx->B->data;
1842     y = (Mat_SeqBAIJ *)yy->B->data;
1843     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1844     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1845     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1846   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1847     PetscCall(MatAXPY_Basic(Y, a, X, str));
1848   } else {
1849     Mat       B;
1850     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1851     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1852     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1853     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1854     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1855     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1856     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1857     PetscCall(MatSetType(B, MATMPIBAIJ));
1858     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1859     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1860     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1861     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1862     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1863     PetscCall(MatHeaderMerge(Y, &B));
1864     PetscCall(PetscFree(nnz_d));
1865     PetscCall(PetscFree(nnz_o));
1866   }
1867   PetscFunctionReturn(PETSC_SUCCESS);
1868 }
1869 
1870 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1871 {
1872   PetscFunctionBegin;
1873   if (PetscDefined(USE_COMPLEX)) {
1874     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1875 
1876     PetscCall(MatConjugate_SeqBAIJ(a->A));
1877     PetscCall(MatConjugate_SeqBAIJ(a->B));
1878   }
1879   PetscFunctionReturn(PETSC_SUCCESS);
1880 }
1881 
1882 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1883 {
1884   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1885 
1886   PetscFunctionBegin;
1887   PetscCall(MatRealPart(a->A));
1888   PetscCall(MatRealPart(a->B));
1889   PetscFunctionReturn(PETSC_SUCCESS);
1890 }
1891 
1892 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1893 {
1894   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1895 
1896   PetscFunctionBegin;
1897   PetscCall(MatImaginaryPart(a->A));
1898   PetscCall(MatImaginaryPart(a->B));
1899   PetscFunctionReturn(PETSC_SUCCESS);
1900 }
1901 
1902 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1903 {
1904   IS       iscol_local;
1905   PetscInt csize;
1906 
1907   PetscFunctionBegin;
1908   PetscCall(ISGetLocalSize(iscol, &csize));
1909   if (call == MAT_REUSE_MATRIX) {
1910     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1911     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1912   } else {
1913     PetscCall(ISAllGather(iscol, &iscol_local));
1914   }
1915   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1916   if (call == MAT_INITIAL_MATRIX) {
1917     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1918     PetscCall(ISDestroy(&iscol_local));
1919   }
1920   PetscFunctionReturn(PETSC_SUCCESS);
1921 }
1922 
1923 /*
1924   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1925   in local and then by concatenating the local matrices the end result.
1926   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1927   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1928 */
1929 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1930 {
1931   PetscMPIInt  rank, size;
1932   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1933   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1934   Mat          M, Mreuse;
1935   MatScalar   *vwork, *aa;
1936   MPI_Comm     comm;
1937   IS           isrow_new, iscol_new;
1938   Mat_SeqBAIJ *aij;
1939 
1940   PetscFunctionBegin;
1941   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1942   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1943   PetscCallMPI(MPI_Comm_size(comm, &size));
1944   /* The compression and expansion should be avoided. Doesn't point
1945      out errors, might change the indices, hence buggey */
1946   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1947   if (isrow == iscol) {
1948     iscol_new = isrow_new;
1949     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1950   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1951 
1952   if (call == MAT_REUSE_MATRIX) {
1953     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1954     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1955     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1956   } else {
1957     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1958   }
1959   PetscCall(ISDestroy(&isrow_new));
1960   PetscCall(ISDestroy(&iscol_new));
1961   /*
1962       m - number of local rows
1963       n - number of columns (same on all processors)
1964       rstart - first row in new global matrix generated
1965   */
1966   PetscCall(MatGetBlockSize(mat, &bs));
1967   PetscCall(MatGetSize(Mreuse, &m, &n));
1968   m = m / bs;
1969   n = n / bs;
1970 
1971   if (call == MAT_INITIAL_MATRIX) {
1972     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1973     ii  = aij->i;
1974     jj  = aij->j;
1975 
1976     /*
1977         Determine the number of non-zeros in the diagonal and off-diagonal
1978         portions of the matrix in order to do correct preallocation
1979     */
1980 
1981     /* first get start and end of "diagonal" columns */
1982     if (csize == PETSC_DECIDE) {
1983       PetscCall(ISGetSize(isrow, &mglobal));
1984       if (mglobal == n * bs) { /* square matrix */
1985         nlocal = m;
1986       } else {
1987         nlocal = n / size + ((n % size) > rank);
1988       }
1989     } else {
1990       nlocal = csize / bs;
1991     }
1992     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1993     rstart = rend - nlocal;
1994     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1995 
1996     /* next, compute all the lengths */
1997     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1998     for (i = 0; i < m; i++) {
1999       jend = ii[i + 1] - ii[i];
2000       olen = 0;
2001       dlen = 0;
2002       for (j = 0; j < jend; j++) {
2003         if (*jj < rstart || *jj >= rend) olen++;
2004         else dlen++;
2005         jj++;
2006       }
2007       olens[i] = olen;
2008       dlens[i] = dlen;
2009     }
2010     PetscCall(MatCreate(comm, &M));
2011     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2012     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2013     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2014     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2015     PetscCall(PetscFree2(dlens, olens));
2016   } else {
2017     PetscInt ml, nl;
2018 
2019     M = *newmat;
2020     PetscCall(MatGetLocalSize(M, &ml, &nl));
2021     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2022     PetscCall(MatZeroEntries(M));
2023     /*
2024          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2025        rather than the slower MatSetValues().
2026     */
2027     M->was_assembled = PETSC_TRUE;
2028     M->assembled     = PETSC_FALSE;
2029   }
2030   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2031   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2032   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2033   ii  = aij->i;
2034   jj  = aij->j;
2035   aa  = aij->a;
2036   for (i = 0; i < m; i++) {
2037     row   = rstart / bs + i;
2038     nz    = ii[i + 1] - ii[i];
2039     cwork = jj;
2040     jj    = PetscSafePointerPlusOffset(jj, nz);
2041     vwork = aa;
2042     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2043     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2044   }
2045 
2046   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2047   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2048   *newmat = M;
2049 
2050   /* save submatrix used in processor for next request */
2051   if (call == MAT_INITIAL_MATRIX) {
2052     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2053     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2054   }
2055   PetscFunctionReturn(PETSC_SUCCESS);
2056 }
2057 
2058 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2059 {
2060   MPI_Comm        comm, pcomm;
2061   PetscInt        clocal_size, nrows;
2062   const PetscInt *rows;
2063   PetscMPIInt     size;
2064   IS              crowp, lcolp;
2065 
2066   PetscFunctionBegin;
2067   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2068   /* make a collective version of 'rowp' */
2069   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2070   if (pcomm == comm) {
2071     crowp = rowp;
2072   } else {
2073     PetscCall(ISGetSize(rowp, &nrows));
2074     PetscCall(ISGetIndices(rowp, &rows));
2075     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2076     PetscCall(ISRestoreIndices(rowp, &rows));
2077   }
2078   PetscCall(ISSetPermutation(crowp));
2079   /* make a local version of 'colp' */
2080   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2081   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2082   if (size == 1) {
2083     lcolp = colp;
2084   } else {
2085     PetscCall(ISAllGather(colp, &lcolp));
2086   }
2087   PetscCall(ISSetPermutation(lcolp));
2088   /* now we just get the submatrix */
2089   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2090   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2091   /* clean up */
2092   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2093   if (size > 1) PetscCall(ISDestroy(&lcolp));
2094   PetscFunctionReturn(PETSC_SUCCESS);
2095 }
2096 
2097 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2098 {
2099   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2100   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2101 
2102   PetscFunctionBegin;
2103   if (nghosts) *nghosts = B->nbs;
2104   if (ghosts) *ghosts = baij->garray;
2105   PetscFunctionReturn(PETSC_SUCCESS);
2106 }
2107 
2108 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2109 {
2110   Mat          B;
2111   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2112   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2113   Mat_SeqAIJ  *b;
2114   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2115   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2116   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2117 
2118   PetscFunctionBegin;
2119   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2120   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2121 
2122   /*   Tell every processor the number of nonzeros per row  */
2123   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2124   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2125   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2126   displs = recvcounts + size;
2127   for (i = 0; i < size; i++) {
2128     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2129     displs[i]     = A->rmap->range[i] / bs;
2130   }
2131   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2132   /* Create the sequential matrix of the same type as the local block diagonal  */
2133   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2134   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2135   PetscCall(MatSetType(B, MATSEQAIJ));
2136   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2137   b = (Mat_SeqAIJ *)B->data;
2138 
2139   /*     Copy my part of matrix column indices over  */
2140   sendcount  = ad->nz + bd->nz;
2141   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2142   a_jsendbuf = ad->j;
2143   b_jsendbuf = bd->j;
2144   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2145   cnt        = 0;
2146   for (i = 0; i < n; i++) {
2147     /* put in lower diagonal portion */
2148     m = bd->i[i + 1] - bd->i[i];
2149     while (m > 0) {
2150       /* is it above diagonal (in bd (compressed) numbering) */
2151       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2152       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2153       m--;
2154     }
2155 
2156     /* put in diagonal portion */
2157     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2158 
2159     /* put in upper diagonal portion */
2160     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2161   }
2162   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2163 
2164   /*  Gather all column indices to all processors  */
2165   for (i = 0; i < size; i++) {
2166     recvcounts[i] = 0;
2167     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2168   }
2169   displs[0] = 0;
2170   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2171   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2172   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2173   /* set the b->ilen (length of each row) values */
2174   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2175   /* set the b->i indices */
2176   b->i[0] = 0;
2177   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2178   PetscCall(PetscFree(lens));
2179   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2180   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2181   PetscCall(PetscFree(recvcounts));
2182 
2183   PetscCall(MatPropagateSymmetryOptions(A, B));
2184   *newmat = B;
2185   PetscFunctionReturn(PETSC_SUCCESS);
2186 }
2187 
2188 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2189 {
2190   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2191   Vec          bb1 = NULL;
2192 
2193   PetscFunctionBegin;
2194   if (flag == SOR_APPLY_UPPER) {
2195     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2196     PetscFunctionReturn(PETSC_SUCCESS);
2197   }
2198 
2199   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2200 
2201   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2202     if (flag & SOR_ZERO_INITIAL_GUESS) {
2203       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2204       its--;
2205     }
2206 
2207     while (its--) {
2208       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2209       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2210 
2211       /* update rhs: bb1 = bb - B*x */
2212       PetscCall(VecScale(mat->lvec, -1.0));
2213       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2214 
2215       /* local sweep */
2216       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2217     }
2218   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2219     if (flag & SOR_ZERO_INITIAL_GUESS) {
2220       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2221       its--;
2222     }
2223     while (its--) {
2224       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2225       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2226 
2227       /* update rhs: bb1 = bb - B*x */
2228       PetscCall(VecScale(mat->lvec, -1.0));
2229       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2230 
2231       /* local sweep */
2232       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2233     }
2234   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2235     if (flag & SOR_ZERO_INITIAL_GUESS) {
2236       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2237       its--;
2238     }
2239     while (its--) {
2240       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2241       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2242 
2243       /* update rhs: bb1 = bb - B*x */
2244       PetscCall(VecScale(mat->lvec, -1.0));
2245       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2246 
2247       /* local sweep */
2248       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2249     }
2250   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2251 
2252   PetscCall(VecDestroy(&bb1));
2253   PetscFunctionReturn(PETSC_SUCCESS);
2254 }
2255 
2256 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2257 {
2258   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2259   PetscInt     m, N, i, *garray = aij->garray;
2260   PetscInt     ib, jb, bs = A->rmap->bs;
2261   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2262   MatScalar   *a_val = a_aij->a;
2263   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2264   MatScalar   *b_val = b_aij->a;
2265   PetscReal   *work;
2266 
2267   PetscFunctionBegin;
2268   PetscCall(MatGetSize(A, &m, &N));
2269   PetscCall(PetscCalloc1(N, &work));
2270   if (type == NORM_2) {
2271     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2272       for (jb = 0; jb < bs; jb++) {
2273         for (ib = 0; ib < bs; ib++) {
2274           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2275           a_val++;
2276         }
2277       }
2278     }
2279     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2280       for (jb = 0; jb < bs; jb++) {
2281         for (ib = 0; ib < bs; ib++) {
2282           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2283           b_val++;
2284         }
2285       }
2286     }
2287   } else if (type == NORM_1) {
2288     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2289       for (jb = 0; jb < bs; jb++) {
2290         for (ib = 0; ib < bs; ib++) {
2291           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2292           a_val++;
2293         }
2294       }
2295     }
2296     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2297       for (jb = 0; jb < bs; jb++) {
2298         for (ib = 0; ib < bs; ib++) {
2299           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2300           b_val++;
2301         }
2302       }
2303     }
2304   } else if (type == NORM_INFINITY) {
2305     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2306       for (jb = 0; jb < bs; jb++) {
2307         for (ib = 0; ib < bs; ib++) {
2308           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2309           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2310           a_val++;
2311         }
2312       }
2313     }
2314     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2315       for (jb = 0; jb < bs; jb++) {
2316         for (ib = 0; ib < bs; ib++) {
2317           int col   = garray[b_aij->j[i]] * bs + jb;
2318           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2319           b_val++;
2320         }
2321       }
2322     }
2323   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2324     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2325       for (jb = 0; jb < bs; jb++) {
2326         for (ib = 0; ib < bs; ib++) {
2327           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2328           a_val++;
2329         }
2330       }
2331     }
2332     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2333       for (jb = 0; jb < bs; jb++) {
2334         for (ib = 0; ib < bs; ib++) {
2335           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2336           b_val++;
2337         }
2338       }
2339     }
2340   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2341     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2342       for (jb = 0; jb < bs; jb++) {
2343         for (ib = 0; ib < bs; ib++) {
2344           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2345           a_val++;
2346         }
2347       }
2348     }
2349     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2350       for (jb = 0; jb < bs; jb++) {
2351         for (ib = 0; ib < bs; ib++) {
2352           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2353           b_val++;
2354         }
2355       }
2356     }
2357   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2358   if (type == NORM_INFINITY) {
2359     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2360   } else {
2361     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2362   }
2363   PetscCall(PetscFree(work));
2364   if (type == NORM_2) {
2365     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2366   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2367     for (i = 0; i < N; i++) reductions[i] /= m;
2368   }
2369   PetscFunctionReturn(PETSC_SUCCESS);
2370 }
2371 
2372 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2373 {
2374   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2375 
2376   PetscFunctionBegin;
2377   PetscCall(MatInvertBlockDiagonal(a->A, values));
2378   A->factorerrortype             = a->A->factorerrortype;
2379   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2380   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2381   PetscFunctionReturn(PETSC_SUCCESS);
2382 }
2383 
2384 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2385 {
2386   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2387   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2388 
2389   PetscFunctionBegin;
2390   if (!Y->preallocated) {
2391     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2392   } else if (!aij->nz) {
2393     PetscInt nonew = aij->nonew;
2394     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2395     aij->nonew = nonew;
2396   }
2397   PetscCall(MatShift_Basic(Y, a));
2398   PetscFunctionReturn(PETSC_SUCCESS);
2399 }
2400 
2401 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2402 {
2403   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2404 
2405   PetscFunctionBegin;
2406   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2407   PetscCall(MatMissingDiagonal(a->A, missing, d));
2408   if (d) {
2409     PetscInt rstart;
2410     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2411     *d += rstart / A->rmap->bs;
2412   }
2413   PetscFunctionReturn(PETSC_SUCCESS);
2414 }
2415 
2416 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2417 {
2418   PetscFunctionBegin;
2419   *a = ((Mat_MPIBAIJ *)A->data)->A;
2420   PetscFunctionReturn(PETSC_SUCCESS);
2421 }
2422 
2423 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2424 {
2425   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2426 
2427   PetscFunctionBegin;
2428   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2429   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2430   PetscFunctionReturn(PETSC_SUCCESS);
2431 }
2432 
2433 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2434                                        MatGetRow_MPIBAIJ,
2435                                        MatRestoreRow_MPIBAIJ,
2436                                        MatMult_MPIBAIJ,
2437                                        /* 4*/ MatMultAdd_MPIBAIJ,
2438                                        MatMultTranspose_MPIBAIJ,
2439                                        MatMultTransposeAdd_MPIBAIJ,
2440                                        NULL,
2441                                        NULL,
2442                                        NULL,
2443                                        /*10*/ NULL,
2444                                        NULL,
2445                                        NULL,
2446                                        MatSOR_MPIBAIJ,
2447                                        MatTranspose_MPIBAIJ,
2448                                        /*15*/ MatGetInfo_MPIBAIJ,
2449                                        MatEqual_MPIBAIJ,
2450                                        MatGetDiagonal_MPIBAIJ,
2451                                        MatDiagonalScale_MPIBAIJ,
2452                                        MatNorm_MPIBAIJ,
2453                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2454                                        MatAssemblyEnd_MPIBAIJ,
2455                                        MatSetOption_MPIBAIJ,
2456                                        MatZeroEntries_MPIBAIJ,
2457                                        /*24*/ MatZeroRows_MPIBAIJ,
2458                                        NULL,
2459                                        NULL,
2460                                        NULL,
2461                                        NULL,
2462                                        /*29*/ MatSetUp_MPI_Hash,
2463                                        NULL,
2464                                        NULL,
2465                                        MatGetDiagonalBlock_MPIBAIJ,
2466                                        NULL,
2467                                        /*34*/ MatDuplicate_MPIBAIJ,
2468                                        NULL,
2469                                        NULL,
2470                                        NULL,
2471                                        NULL,
2472                                        /*39*/ MatAXPY_MPIBAIJ,
2473                                        MatCreateSubMatrices_MPIBAIJ,
2474                                        MatIncreaseOverlap_MPIBAIJ,
2475                                        MatGetValues_MPIBAIJ,
2476                                        MatCopy_MPIBAIJ,
2477                                        /*44*/ NULL,
2478                                        MatScale_MPIBAIJ,
2479                                        MatShift_MPIBAIJ,
2480                                        NULL,
2481                                        MatZeroRowsColumns_MPIBAIJ,
2482                                        /*49*/ NULL,
2483                                        NULL,
2484                                        NULL,
2485                                        NULL,
2486                                        NULL,
2487                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2488                                        NULL,
2489                                        MatSetUnfactored_MPIBAIJ,
2490                                        MatPermute_MPIBAIJ,
2491                                        MatSetValuesBlocked_MPIBAIJ,
2492                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2493                                        MatDestroy_MPIBAIJ,
2494                                        MatView_MPIBAIJ,
2495                                        NULL,
2496                                        NULL,
2497                                        /*64*/ NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2503                                        NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        /*74*/ NULL,
2508                                        MatFDColoringApply_BAIJ,
2509                                        NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        /*79*/ NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        MatLoad_MPIBAIJ,
2517                                        /*84*/ NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        /*89*/ NULL,
2523                                        NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        /*94*/ NULL,
2528                                        NULL,
2529                                        NULL,
2530                                        NULL,
2531                                        NULL,
2532                                        /*99*/ NULL,
2533                                        NULL,
2534                                        NULL,
2535                                        MatConjugate_MPIBAIJ,
2536                                        NULL,
2537                                        /*104*/ NULL,
2538                                        MatRealPart_MPIBAIJ,
2539                                        MatImaginaryPart_MPIBAIJ,
2540                                        NULL,
2541                                        NULL,
2542                                        /*109*/ NULL,
2543                                        NULL,
2544                                        NULL,
2545                                        NULL,
2546                                        MatMissingDiagonal_MPIBAIJ,
2547                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2548                                        NULL,
2549                                        MatGetGhosts_MPIBAIJ,
2550                                        NULL,
2551                                        NULL,
2552                                        /*119*/ NULL,
2553                                        NULL,
2554                                        NULL,
2555                                        NULL,
2556                                        MatGetMultiProcBlock_MPIBAIJ,
2557                                        /*124*/ NULL,
2558                                        MatGetColumnReductions_MPIBAIJ,
2559                                        MatInvertBlockDiagonal_MPIBAIJ,
2560                                        NULL,
2561                                        NULL,
2562                                        /*129*/ NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        NULL,
2567                                        /*134*/ NULL,
2568                                        NULL,
2569                                        NULL,
2570                                        NULL,
2571                                        NULL,
2572                                        /*139*/ MatSetBlockSizes_Default,
2573                                        NULL,
2574                                        NULL,
2575                                        MatFDColoringSetUp_MPIXAIJ,
2576                                        NULL,
2577                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2578                                        NULL,
2579                                        NULL,
2580                                        NULL,
2581                                        NULL,
2582                                        NULL,
2583                                        /*150*/ NULL,
2584                                        MatEliminateZeros_MPIBAIJ,
2585                                        MatGetRowSumAbs_MPIBAIJ};
2586 
2587 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2588 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2589 
2590 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2591 {
2592   PetscInt        m, rstart, cstart, cend;
2593   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2594   const PetscInt *JJ          = NULL;
2595   PetscScalar    *values      = NULL;
2596   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2597   PetscBool       nooffprocentries;
2598 
2599   PetscFunctionBegin;
2600   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2601   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2602   PetscCall(PetscLayoutSetUp(B->rmap));
2603   PetscCall(PetscLayoutSetUp(B->cmap));
2604   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2605   m      = B->rmap->n / bs;
2606   rstart = B->rmap->rstart / bs;
2607   cstart = B->cmap->rstart / bs;
2608   cend   = B->cmap->rend / bs;
2609 
2610   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2611   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2612   for (i = 0; i < m; i++) {
2613     nz = ii[i + 1] - ii[i];
2614     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2615     nz_max = PetscMax(nz_max, nz);
2616     dlen   = 0;
2617     olen   = 0;
2618     JJ     = jj + ii[i];
2619     for (j = 0; j < nz; j++) {
2620       if (*JJ < cstart || *JJ >= cend) olen++;
2621       else dlen++;
2622       JJ++;
2623     }
2624     d_nnz[i] = dlen;
2625     o_nnz[i] = olen;
2626   }
2627   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2628   PetscCall(PetscFree2(d_nnz, o_nnz));
2629 
2630   values = (PetscScalar *)V;
2631   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2632   for (i = 0; i < m; i++) {
2633     PetscInt        row   = i + rstart;
2634     PetscInt        ncols = ii[i + 1] - ii[i];
2635     const PetscInt *icols = jj + ii[i];
2636     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2637       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2638       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2639     } else { /* block ordering does not match so we can only insert one block at a time. */
2640       PetscInt j;
2641       for (j = 0; j < ncols; j++) {
2642         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2643         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2644       }
2645     }
2646   }
2647 
2648   if (!V) PetscCall(PetscFree(values));
2649   nooffprocentries    = B->nooffprocentries;
2650   B->nooffprocentries = PETSC_TRUE;
2651   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2652   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2653   B->nooffprocentries = nooffprocentries;
2654 
2655   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2656   PetscFunctionReturn(PETSC_SUCCESS);
2657 }
2658 
2659 /*@C
2660   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2661 
2662   Collective
2663 
2664   Input Parameters:
2665 + B  - the matrix
2666 . bs - the block size
2667 . i  - the indices into `j` for the start of each local row (starts with zero)
2668 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2669 - v  - optional values in the matrix, use `NULL` if not provided
2670 
2671   Level: advanced
2672 
2673   Notes:
2674   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2675   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2676   called this routine.
2677 
2678   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2679   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2680   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2681   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2682   block column and the second index is over columns within a block.
2683 
2684   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2685 
2686 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2687 @*/
2688 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2689 {
2690   PetscFunctionBegin;
2691   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2692   PetscValidType(B, 1);
2693   PetscValidLogicalCollectiveInt(B, bs, 2);
2694   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2695   PetscFunctionReturn(PETSC_SUCCESS);
2696 }
2697 
2698 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2699 {
2700   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2701   PetscInt     i;
2702   PetscMPIInt  size;
2703 
2704   PetscFunctionBegin;
2705   if (B->hash_active) {
2706     B->ops[0]      = b->cops;
2707     B->hash_active = PETSC_FALSE;
2708   }
2709   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2710   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2711   PetscCall(PetscLayoutSetUp(B->rmap));
2712   PetscCall(PetscLayoutSetUp(B->cmap));
2713   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2714 
2715   if (d_nnz) {
2716     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2717   }
2718   if (o_nnz) {
2719     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2720   }
2721 
2722   b->bs2 = bs * bs;
2723   b->mbs = B->rmap->n / bs;
2724   b->nbs = B->cmap->n / bs;
2725   b->Mbs = B->rmap->N / bs;
2726   b->Nbs = B->cmap->N / bs;
2727 
2728   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2729   b->rstartbs = B->rmap->rstart / bs;
2730   b->rendbs   = B->rmap->rend / bs;
2731   b->cstartbs = B->cmap->rstart / bs;
2732   b->cendbs   = B->cmap->rend / bs;
2733 
2734 #if defined(PETSC_USE_CTABLE)
2735   PetscCall(PetscHMapIDestroy(&b->colmap));
2736 #else
2737   PetscCall(PetscFree(b->colmap));
2738 #endif
2739   PetscCall(PetscFree(b->garray));
2740   PetscCall(VecDestroy(&b->lvec));
2741   PetscCall(VecScatterDestroy(&b->Mvctx));
2742 
2743   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2744 
2745   MatSeqXAIJGetOptions_Private(b->B);
2746   PetscCall(MatDestroy(&b->B));
2747   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2748   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2749   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2750   MatSeqXAIJRestoreOptions_Private(b->B);
2751 
2752   MatSeqXAIJGetOptions_Private(b->A);
2753   PetscCall(MatDestroy(&b->A));
2754   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2755   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2756   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2757   MatSeqXAIJRestoreOptions_Private(b->A);
2758 
2759   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2760   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2761   B->preallocated  = PETSC_TRUE;
2762   B->was_assembled = PETSC_FALSE;
2763   B->assembled     = PETSC_FALSE;
2764   PetscFunctionReturn(PETSC_SUCCESS);
2765 }
2766 
2767 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2768 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2769 
2770 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2771 {
2772   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2773   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2774   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2775   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2776 
2777   PetscFunctionBegin;
2778   PetscCall(PetscMalloc1(M + 1, &ii));
2779   ii[0] = 0;
2780   for (i = 0; i < M; i++) {
2781     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2782     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2783     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2784     /* remove one from count of matrix has diagonal */
2785     for (j = id[i]; j < id[i + 1]; j++) {
2786       if (jd[j] == i) {
2787         ii[i + 1]--;
2788         break;
2789       }
2790     }
2791   }
2792   PetscCall(PetscMalloc1(ii[M], &jj));
2793   cnt = 0;
2794   for (i = 0; i < M; i++) {
2795     for (j = io[i]; j < io[i + 1]; j++) {
2796       if (garray[jo[j]] > rstart) break;
2797       jj[cnt++] = garray[jo[j]];
2798     }
2799     for (k = id[i]; k < id[i + 1]; k++) {
2800       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2801     }
2802     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2803   }
2804   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2805   PetscFunctionReturn(PETSC_SUCCESS);
2806 }
2807 
2808 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2809 
2810 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2811 
2812 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2813 {
2814   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2815   Mat_MPIAIJ  *b;
2816   Mat          B;
2817 
2818   PetscFunctionBegin;
2819   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2820 
2821   if (reuse == MAT_REUSE_MATRIX) {
2822     B = *newmat;
2823   } else {
2824     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2825     PetscCall(MatSetType(B, MATMPIAIJ));
2826     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2827     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2828     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2829     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2830   }
2831   b = (Mat_MPIAIJ *)B->data;
2832 
2833   if (reuse == MAT_REUSE_MATRIX) {
2834     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2835     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2836   } else {
2837     PetscInt   *garray = a->garray;
2838     Mat_SeqAIJ *bB;
2839     PetscInt    bs, nnz;
2840     PetscCall(MatDestroy(&b->A));
2841     PetscCall(MatDestroy(&b->B));
2842     /* just clear out the data structure */
2843     PetscCall(MatDisAssemble_MPIAIJ(B));
2844     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2845     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2846 
2847     /* Global numbering for b->B columns */
2848     bB  = (Mat_SeqAIJ *)b->B->data;
2849     bs  = A->rmap->bs;
2850     nnz = bB->i[A->rmap->n];
2851     for (PetscInt k = 0; k < nnz; k++) {
2852       PetscInt bj = bB->j[k] / bs;
2853       PetscInt br = bB->j[k] % bs;
2854       bB->j[k]    = garray[bj] * bs + br;
2855     }
2856   }
2857   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2858   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2859 
2860   if (reuse == MAT_INPLACE_MATRIX) {
2861     PetscCall(MatHeaderReplace(A, &B));
2862   } else {
2863     *newmat = B;
2864   }
2865   PetscFunctionReturn(PETSC_SUCCESS);
2866 }
2867 
2868 /*MC
2869    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2870 
2871    Options Database Keys:
2872 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2873 . -mat_block_size <bs> - set the blocksize used to store the matrix
2874 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2875 - -mat_use_hash_table <fact> - set hash table factor
2876 
2877    Level: beginner
2878 
2879    Note:
2880     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2881     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2882 
2883 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2884 M*/
2885 
2886 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2887 
2888 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2889 {
2890   Mat_MPIBAIJ *b;
2891   PetscBool    flg = PETSC_FALSE;
2892 
2893   PetscFunctionBegin;
2894   PetscCall(PetscNew(&b));
2895   B->data      = (void *)b;
2896   B->ops[0]    = MatOps_Values;
2897   B->assembled = PETSC_FALSE;
2898 
2899   B->insertmode = NOT_SET_VALUES;
2900   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2901   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2902 
2903   /* build local table of row and column ownerships */
2904   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2905 
2906   /* build cache for off array entries formed */
2907   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2908 
2909   b->donotstash  = PETSC_FALSE;
2910   b->colmap      = NULL;
2911   b->garray      = NULL;
2912   b->roworiented = PETSC_TRUE;
2913 
2914   /* stuff used in block assembly */
2915   b->barray = NULL;
2916 
2917   /* stuff used for matrix vector multiply */
2918   b->lvec  = NULL;
2919   b->Mvctx = NULL;
2920 
2921   /* stuff for MatGetRow() */
2922   b->rowindices   = NULL;
2923   b->rowvalues    = NULL;
2924   b->getrowactive = PETSC_FALSE;
2925 
2926   /* hash table stuff */
2927   b->ht           = NULL;
2928   b->hd           = NULL;
2929   b->ht_size      = 0;
2930   b->ht_flag      = PETSC_FALSE;
2931   b->ht_fact      = 0;
2932   b->ht_total_ct  = 0;
2933   b->ht_insert_ct = 0;
2934 
2935   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2936   b->ijonly = PETSC_FALSE;
2937 
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2940   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2941 #if defined(PETSC_HAVE_HYPRE)
2942   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2943 #endif
2944   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2945   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2946   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2947   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2948   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2949   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2950   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2951   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2952 
2953   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2954   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2955   if (flg) {
2956     PetscReal fact = 1.39;
2957     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2958     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2959     if (fact <= 1.0) fact = 1.39;
2960     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2961     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2962   }
2963   PetscOptionsEnd();
2964   PetscFunctionReturn(PETSC_SUCCESS);
2965 }
2966 
2967 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2968 /*MC
2969    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2970 
2971    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2972    and `MATMPIBAIJ` otherwise.
2973 
2974    Options Database Keys:
2975 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2976 
2977   Level: beginner
2978 
2979 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2980 M*/
2981 
2982 /*@C
2983   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2984   (block compressed row).
2985 
2986   Collective
2987 
2988   Input Parameters:
2989 + B     - the matrix
2990 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2991           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2992 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2993            submatrix  (same for all local rows)
2994 . d_nnz - array containing the number of block nonzeros in the various block rows
2995            of the in diagonal portion of the local (possibly different for each block
2996            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2997            set it even if it is zero.
2998 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2999            submatrix (same for all local rows).
3000 - o_nnz - array containing the number of nonzeros in the various block rows of the
3001            off-diagonal portion of the local submatrix (possibly different for
3002            each block row) or `NULL`.
3003 
3004    If the *_nnz parameter is given then the *_nz parameter is ignored
3005 
3006   Options Database Keys:
3007 + -mat_block_size            - size of the blocks to use
3008 - -mat_use_hash_table <fact> - set hash table factor
3009 
3010   Level: intermediate
3011 
3012   Notes:
3013   For good matrix assembly performance
3014   the user should preallocate the matrix storage by setting the parameters
3015   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3016   performance can be increased by more than a factor of 50.
3017 
3018   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3019   than it must be used on all processors that share the object for that argument.
3020 
3021   Storage Information:
3022   For a square global matrix we define each processor's diagonal portion
3023   to be its local rows and the corresponding columns (a square submatrix);
3024   each processor's off-diagonal portion encompasses the remainder of the
3025   local matrix (a rectangular submatrix).
3026 
3027   The user can specify preallocated storage for the diagonal part of
3028   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3029   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3030   memory allocation.  Likewise, specify preallocated storage for the
3031   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3032 
3033   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3034   the figure below we depict these three local rows and all columns (0-11).
3035 
3036 .vb
3037            0 1 2 3 4 5 6 7 8 9 10 11
3038           --------------------------
3039    row 3  |o o o d d d o o o o  o  o
3040    row 4  |o o o d d d o o o o  o  o
3041    row 5  |o o o d d d o o o o  o  o
3042           --------------------------
3043 .ve
3044 
3045   Thus, any entries in the d locations are stored in the d (diagonal)
3046   submatrix, and any entries in the o locations are stored in the
3047   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3048   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3049 
3050   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3051   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3052   In general, for PDE problems in which most nonzeros are near the diagonal,
3053   one expects `d_nz` >> `o_nz`.
3054 
3055   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3056   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3057   You can also run with the option `-info` and look for messages with the string
3058   malloc in them to see if additional memory allocation was needed.
3059 
3060 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3061 @*/
3062 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3063 {
3064   PetscFunctionBegin;
3065   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3066   PetscValidType(B, 1);
3067   PetscValidLogicalCollectiveInt(B, bs, 2);
3068   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3069   PetscFunctionReturn(PETSC_SUCCESS);
3070 }
3071 
3072 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3073 /*@C
3074   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3075   (block compressed row).
3076 
3077   Collective
3078 
3079   Input Parameters:
3080 + comm  - MPI communicator
3081 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3082           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3083 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3084           This value should be the same as the local size used in creating the
3085           y vector for the matrix-vector product y = Ax.
3086 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3087           This value should be the same as the local size used in creating the
3088           x vector for the matrix-vector product y = Ax.
3089 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3090 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3091 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3092           submatrix  (same for all local rows)
3093 . d_nnz - array containing the number of nonzero blocks in the various block rows
3094           of the in diagonal portion of the local (possibly different for each block
3095           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3096           and set it even if it is zero.
3097 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3098           submatrix (same for all local rows).
3099 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3100           off-diagonal portion of the local submatrix (possibly different for
3101           each block row) or NULL.
3102 
3103   Output Parameter:
3104 . A - the matrix
3105 
3106   Options Database Keys:
3107 + -mat_block_size            - size of the blocks to use
3108 - -mat_use_hash_table <fact> - set hash table factor
3109 
3110   Level: intermediate
3111 
3112   Notes:
3113   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3114   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3115   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3116 
3117   For good matrix assembly performance
3118   the user should preallocate the matrix storage by setting the parameters
3119   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3120   performance can be increased by more than a factor of 50.
3121 
3122   If the *_nnz parameter is given then the *_nz parameter is ignored
3123 
3124   A nonzero block is any block that as 1 or more nonzeros in it
3125 
3126   The user MUST specify either the local or global matrix dimensions
3127   (possibly both).
3128 
3129   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3130   than it must be used on all processors that share the object for that argument.
3131 
3132   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3133   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3134 
3135   Storage Information:
3136   For a square global matrix we define each processor's diagonal portion
3137   to be its local rows and the corresponding columns (a square submatrix);
3138   each processor's off-diagonal portion encompasses the remainder of the
3139   local matrix (a rectangular submatrix).
3140 
3141   The user can specify preallocated storage for the diagonal part of
3142   the local submatrix with either d_nz or d_nnz (not both).  Set
3143   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3144   memory allocation.  Likewise, specify preallocated storage for the
3145   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3146 
3147   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3148   the figure below we depict these three local rows and all columns (0-11).
3149 
3150 .vb
3151            0 1 2 3 4 5 6 7 8 9 10 11
3152           --------------------------
3153    row 3  |o o o d d d o o o o  o  o
3154    row 4  |o o o d d d o o o o  o  o
3155    row 5  |o o o d d d o o o o  o  o
3156           --------------------------
3157 .ve
3158 
3159   Thus, any entries in the d locations are stored in the d (diagonal)
3160   submatrix, and any entries in the o locations are stored in the
3161   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3162   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3163 
3164   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3165   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3166   In general, for PDE problems in which most nonzeros are near the diagonal,
3167   one expects `d_nz` >> `o_nz`.
3168 
3169 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3170           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3171 @*/
3172 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3173 {
3174   PetscMPIInt size;
3175 
3176   PetscFunctionBegin;
3177   PetscCall(MatCreate(comm, A));
3178   PetscCall(MatSetSizes(*A, m, n, M, N));
3179   PetscCallMPI(MPI_Comm_size(comm, &size));
3180   if (size > 1) {
3181     PetscCall(MatSetType(*A, MATMPIBAIJ));
3182     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3183   } else {
3184     PetscCall(MatSetType(*A, MATSEQBAIJ));
3185     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3186   }
3187   PetscFunctionReturn(PETSC_SUCCESS);
3188 }
3189 
3190 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3191 {
3192   Mat          mat;
3193   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3194   PetscInt     len = 0;
3195 
3196   PetscFunctionBegin;
3197   *newmat = NULL;
3198   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3199   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3200   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3201 
3202   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3203   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3204   if (matin->hash_active) {
3205     PetscCall(MatSetUp(mat));
3206   } else {
3207     mat->factortype   = matin->factortype;
3208     mat->preallocated = PETSC_TRUE;
3209     mat->assembled    = PETSC_TRUE;
3210     mat->insertmode   = NOT_SET_VALUES;
3211 
3212     a             = (Mat_MPIBAIJ *)mat->data;
3213     mat->rmap->bs = matin->rmap->bs;
3214     a->bs2        = oldmat->bs2;
3215     a->mbs        = oldmat->mbs;
3216     a->nbs        = oldmat->nbs;
3217     a->Mbs        = oldmat->Mbs;
3218     a->Nbs        = oldmat->Nbs;
3219 
3220     a->size         = oldmat->size;
3221     a->rank         = oldmat->rank;
3222     a->donotstash   = oldmat->donotstash;
3223     a->roworiented  = oldmat->roworiented;
3224     a->rowindices   = NULL;
3225     a->rowvalues    = NULL;
3226     a->getrowactive = PETSC_FALSE;
3227     a->barray       = NULL;
3228     a->rstartbs     = oldmat->rstartbs;
3229     a->rendbs       = oldmat->rendbs;
3230     a->cstartbs     = oldmat->cstartbs;
3231     a->cendbs       = oldmat->cendbs;
3232 
3233     /* hash table stuff */
3234     a->ht           = NULL;
3235     a->hd           = NULL;
3236     a->ht_size      = 0;
3237     a->ht_flag      = oldmat->ht_flag;
3238     a->ht_fact      = oldmat->ht_fact;
3239     a->ht_total_ct  = 0;
3240     a->ht_insert_ct = 0;
3241 
3242     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3243     if (oldmat->colmap) {
3244 #if defined(PETSC_USE_CTABLE)
3245       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3246 #else
3247       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3248       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3249 #endif
3250     } else a->colmap = NULL;
3251 
3252     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3253       PetscCall(PetscMalloc1(len, &a->garray));
3254       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3255     } else a->garray = NULL;
3256 
3257     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3258     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3259     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3260 
3261     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3262     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3263   }
3264   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3265   *newmat = mat;
3266   PetscFunctionReturn(PETSC_SUCCESS);
3267 }
3268 
3269 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3270 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3271 {
3272   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3273   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3274   PetscScalar *matvals;
3275 
3276   PetscFunctionBegin;
3277   PetscCall(PetscViewerSetUp(viewer));
3278 
3279   /* read in matrix header */
3280   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3281   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3282   M  = header[1];
3283   N  = header[2];
3284   nz = header[3];
3285   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3286   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3287   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3288 
3289   /* set block sizes from the viewer's .info file */
3290   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3291   /* set local sizes if not set already */
3292   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3293   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3294   /* set global sizes if not set already */
3295   if (mat->rmap->N < 0) mat->rmap->N = M;
3296   if (mat->cmap->N < 0) mat->cmap->N = N;
3297   PetscCall(PetscLayoutSetUp(mat->rmap));
3298   PetscCall(PetscLayoutSetUp(mat->cmap));
3299 
3300   /* check if the matrix sizes are correct */
3301   PetscCall(MatGetSize(mat, &rows, &cols));
3302   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3303   PetscCall(MatGetBlockSize(mat, &bs));
3304   PetscCall(MatGetLocalSize(mat, &m, &n));
3305   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3306   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3307   mbs = m / bs;
3308   nbs = n / bs;
3309 
3310   /* read in row lengths and build row indices */
3311   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3312   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3313   rowidxs[0] = 0;
3314   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3315   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3316   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3317 
3318   /* read in column indices and matrix values */
3319   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3320   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3321   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3322 
3323   {                /* preallocate matrix storage */
3324     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3325     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3326     PetscBool  sbaij, done;
3327     PetscInt  *d_nnz, *o_nnz;
3328 
3329     PetscCall(PetscBTCreate(nbs, &bt));
3330     PetscCall(PetscHSetICreate(&ht));
3331     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3332     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3333     for (i = 0; i < mbs; i++) {
3334       PetscCall(PetscBTMemzero(nbs, bt));
3335       PetscCall(PetscHSetIClear(ht));
3336       for (k = 0; k < bs; k++) {
3337         PetscInt row = bs * i + k;
3338         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3339           PetscInt col = colidxs[j];
3340           if (!sbaij || col >= row) {
3341             if (col >= cs && col < ce) {
3342               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3343             } else {
3344               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3345               if (done) o_nnz[i]++;
3346             }
3347           }
3348         }
3349       }
3350     }
3351     PetscCall(PetscBTDestroy(&bt));
3352     PetscCall(PetscHSetIDestroy(&ht));
3353     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3354     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3355     PetscCall(PetscFree2(d_nnz, o_nnz));
3356   }
3357 
3358   /* store matrix values */
3359   for (i = 0; i < m; i++) {
3360     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3361     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3362   }
3363 
3364   PetscCall(PetscFree(rowidxs));
3365   PetscCall(PetscFree2(colidxs, matvals));
3366   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3367   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3368   PetscFunctionReturn(PETSC_SUCCESS);
3369 }
3370 
3371 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3372 {
3373   PetscBool isbinary;
3374 
3375   PetscFunctionBegin;
3376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3377   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3378   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3379   PetscFunctionReturn(PETSC_SUCCESS);
3380 }
3381 
3382 /*@
3383   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3384 
3385   Input Parameters:
3386 + mat  - the matrix
3387 - fact - factor
3388 
3389   Options Database Key:
3390 . -mat_use_hash_table <fact> - provide the factor
3391 
3392   Level: advanced
3393 
3394 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3395 @*/
3396 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3397 {
3398   PetscFunctionBegin;
3399   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3400   PetscFunctionReturn(PETSC_SUCCESS);
3401 }
3402 
3403 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3404 {
3405   Mat_MPIBAIJ *baij;
3406 
3407   PetscFunctionBegin;
3408   baij          = (Mat_MPIBAIJ *)mat->data;
3409   baij->ht_fact = fact;
3410   PetscFunctionReturn(PETSC_SUCCESS);
3411 }
3412 
3413 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3414 {
3415   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3416   PetscBool    flg;
3417 
3418   PetscFunctionBegin;
3419   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3420   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3421   if (Ad) *Ad = a->A;
3422   if (Ao) *Ao = a->B;
3423   if (colmap) *colmap = a->garray;
3424   PetscFunctionReturn(PETSC_SUCCESS);
3425 }
3426 
3427 /*
3428     Special version for direct calls from Fortran (to eliminate two function call overheads
3429 */
3430 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3431   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3432 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3433   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3434 #endif
3435 
3436 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3437 /*@C
3438   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3439 
3440   Collective
3441 
3442   Input Parameters:
3443 + matin  - the matrix
3444 . min    - number of input rows
3445 . im     - input rows
3446 . nin    - number of input columns
3447 . in     - input columns
3448 . v      - numerical values input
3449 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3450 
3451   Level: advanced
3452 
3453   Developer Notes:
3454   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3455 
3456 .seealso: `Mat`, `MatSetValuesBlocked()`
3457 @*/
3458 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3459 {
3460   /* convert input arguments to C version */
3461   Mat        mat = *matin;
3462   PetscInt   m = *min, n = *nin;
3463   InsertMode addv = *addvin;
3464 
3465   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3466   const MatScalar *value;
3467   MatScalar       *barray      = baij->barray;
3468   PetscBool        roworiented = baij->roworiented;
3469   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3470   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3471   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3472 
3473   PetscFunctionBegin;
3474   /* tasks normally handled by MatSetValuesBlocked() */
3475   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3476   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3477   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3478   if (mat->assembled) {
3479     mat->was_assembled = PETSC_TRUE;
3480     mat->assembled     = PETSC_FALSE;
3481   }
3482   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3483 
3484   if (!barray) {
3485     PetscCall(PetscMalloc1(bs2, &barray));
3486     baij->barray = barray;
3487   }
3488 
3489   if (roworiented) stepval = (n - 1) * bs;
3490   else stepval = (m - 1) * bs;
3491 
3492   for (i = 0; i < m; i++) {
3493     if (im[i] < 0) continue;
3494     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3495     if (im[i] >= rstart && im[i] < rend) {
3496       row = im[i] - rstart;
3497       for (j = 0; j < n; j++) {
3498         /* If NumCol = 1 then a copy is not required */
3499         if ((roworiented) && (n == 1)) {
3500           barray = (MatScalar *)v + i * bs2;
3501         } else if ((!roworiented) && (m == 1)) {
3502           barray = (MatScalar *)v + j * bs2;
3503         } else { /* Here a copy is required */
3504           if (roworiented) {
3505             value = v + i * (stepval + bs) * bs + j * bs;
3506           } else {
3507             value = v + j * (stepval + bs) * bs + i * bs;
3508           }
3509           for (ii = 0; ii < bs; ii++, value += stepval) {
3510             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3511           }
3512           barray -= bs2;
3513         }
3514 
3515         if (in[j] >= cstart && in[j] < cend) {
3516           col = in[j] - cstart;
3517           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3518         } else if (in[j] < 0) {
3519           continue;
3520         } else {
3521           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3522           if (mat->was_assembled) {
3523             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3524 
3525 #if defined(PETSC_USE_DEBUG)
3526   #if defined(PETSC_USE_CTABLE)
3527             {
3528               PetscInt data;
3529               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3530               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3531             }
3532   #else
3533             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3534   #endif
3535 #endif
3536 #if defined(PETSC_USE_CTABLE)
3537             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3538             col = (col - 1) / bs;
3539 #else
3540             col = (baij->colmap[in[j]] - 1) / bs;
3541 #endif
3542             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3543               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3544               col = in[j];
3545             }
3546           } else col = in[j];
3547           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3548         }
3549       }
3550     } else {
3551       if (!baij->donotstash) {
3552         if (roworiented) {
3553           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3554         } else {
3555           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3556         }
3557       }
3558     }
3559   }
3560 
3561   /* task normally handled by MatSetValuesBlocked() */
3562   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3563   PetscFunctionReturn(PETSC_SUCCESS);
3564 }
3565 
3566 /*@
3567   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3568 
3569   Collective
3570 
3571   Input Parameters:
3572 + comm - MPI communicator
3573 . bs   - the block size, only a block size of 1 is supported
3574 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3575 . n    - This value should be the same as the local size used in creating the
3576          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3577          calculated if `N` is given) For square matrices `n` is almost always `m`.
3578 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3579 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3580 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3581 . j    - column indices
3582 - a    - matrix values
3583 
3584   Output Parameter:
3585 . mat - the matrix
3586 
3587   Level: intermediate
3588 
3589   Notes:
3590   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3591   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3592   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3593 
3594   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3595   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3596   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3597   with column-major ordering within blocks.
3598 
3599   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3600 
3601 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3602           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3603 @*/
3604 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3605 {
3606   PetscFunctionBegin;
3607   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3608   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3609   PetscCall(MatCreate(comm, mat));
3610   PetscCall(MatSetSizes(*mat, m, n, M, N));
3611   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3612   PetscCall(MatSetBlockSize(*mat, bs));
3613   PetscCall(MatSetUp(*mat));
3614   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3615   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3616   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3617   PetscFunctionReturn(PETSC_SUCCESS);
3618 }
3619 
3620 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3621 {
3622   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3623   PetscInt    *indx;
3624   PetscScalar *values;
3625 
3626   PetscFunctionBegin;
3627   PetscCall(MatGetSize(inmat, &m, &N));
3628   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3629     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3630     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3631     PetscInt    *bindx, rmax = a->rmax, j;
3632     PetscMPIInt  rank, size;
3633 
3634     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3635     mbs = m / bs;
3636     Nbs = N / cbs;
3637     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3638     nbs = n / cbs;
3639 
3640     PetscCall(PetscMalloc1(rmax, &bindx));
3641     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3642 
3643     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3644     PetscCallMPI(MPI_Comm_rank(comm, &size));
3645     if (rank == size - 1) {
3646       /* Check sum(nbs) = Nbs */
3647       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3648     }
3649 
3650     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3651     for (i = 0; i < mbs; i++) {
3652       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3653       nnz = nnz / bs;
3654       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3655       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3656       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3657     }
3658     PetscCall(PetscFree(bindx));
3659 
3660     PetscCall(MatCreate(comm, outmat));
3661     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3662     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3663     PetscCall(MatSetType(*outmat, MATBAIJ));
3664     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3665     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3666     MatPreallocateEnd(dnz, onz);
3667     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3668   }
3669 
3670   /* numeric phase */
3671   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3672   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3673 
3674   for (i = 0; i < m; i++) {
3675     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3676     Ii = i + rstart;
3677     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3678     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3679   }
3680   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3681   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3682   PetscFunctionReturn(PETSC_SUCCESS);
3683 }
3684