xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision d8a51d2aac522cef49bfbd84fd4cf473dc3fa6ee)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
101 {
102   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
103 
104   PetscFunctionBegin;
105   PetscCall(MatStoreValues(aij->A));
106   PetscCall(MatStoreValues(aij->B));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
111 {
112   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
113 
114   PetscFunctionBegin;
115   PetscCall(MatRetrieveValues(aij->A));
116   PetscCall(MatRetrieveValues(aij->B));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 /*
121      Local utility routine that creates a mapping from the global column
122    number to the local number in the off-diagonal part of the local
123    storage of the matrix.  This is done in a non scalable way since the
124    length of colmap equals the global matrix length.
125 */
126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
127 {
128   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
129   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
130   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
131 
132   PetscFunctionBegin;
133 #if defined(PETSC_USE_CTABLE)
134   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
135   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
136 #else
137   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
138   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
139 #endif
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
144   do { \
145     brow = row / bs; \
146     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
147     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
148     rmax = aimax[brow]; \
149     nrow = ailen[brow]; \
150     bcol = col / bs; \
151     ridx = row % bs; \
152     cidx = col % bs; \
153     low  = 0; \
154     high = nrow; \
155     while (high - low > 3) { \
156       t = (low + high) / 2; \
157       if (rp[t] > bcol) high = t; \
158       else low = t; \
159     } \
160     for (_i = low; _i < high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap + bs2 * _i + bs * cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value; \
165         else *bap = value; \
166         goto a_noinsert; \
167       } \
168     } \
169     if (a->nonew == 1) goto a_noinsert; \
170     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
171     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
172     N = nrow++ - 1; \
173     /* shift up all the later entries in this row */ \
174     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
175     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
176     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
177     rp[_i]                          = bcol; \
178     ap[bs2 * _i + bs * cidx + ridx] = value; \
179   a_noinsert:; \
180     ailen[brow] = nrow; \
181   } while (0)
182 
183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
184   do { \
185     brow = row / bs; \
186     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
187     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
188     rmax = bimax[brow]; \
189     nrow = bilen[brow]; \
190     bcol = col / bs; \
191     ridx = row % bs; \
192     cidx = col % bs; \
193     low  = 0; \
194     high = nrow; \
195     while (high - low > 3) { \
196       t = (low + high) / 2; \
197       if (rp[t] > bcol) high = t; \
198       else low = t; \
199     } \
200     for (_i = low; _i < high; _i++) { \
201       if (rp[_i] > bcol) break; \
202       if (rp[_i] == bcol) { \
203         bap = ap + bs2 * _i + bs * cidx + ridx; \
204         if (addv == ADD_VALUES) *bap += value; \
205         else *bap = value; \
206         goto b_noinsert; \
207       } \
208     } \
209     if (b->nonew == 1) goto b_noinsert; \
210     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
211     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
212     N = nrow++ - 1; \
213     /* shift up all the later entries in this row */ \
214     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
215     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
216     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
217     rp[_i]                          = bcol; \
218     ap[bs2 * _i + bs * cidx + ridx] = value; \
219   b_noinsert:; \
220     bilen[brow] = nrow; \
221   } while (0)
222 
223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
224 {
225   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
226   MatScalar    value;
227   PetscBool    roworiented = baij->roworiented;
228   PetscInt     i, j, row, col;
229   PetscInt     rstart_orig = mat->rmap->rstart;
230   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
231   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
232 
233   /* Some Variables required in the macro */
234   Mat          A     = baij->A;
235   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
236   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
237   MatScalar   *aa = a->a;
238 
239   Mat          B     = baij->B;
240   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
241   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
242   MatScalar   *ba = b->a;
243 
244   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
245   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
246   MatScalar *ap, *bap;
247 
248   PetscFunctionBegin;
249   for (i = 0; i < m; i++) {
250     if (im[i] < 0) continue;
251     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
252     if (im[i] >= rstart_orig && im[i] < rend_orig) {
253       row = im[i] - rstart_orig;
254       for (j = 0; j < n; j++) {
255         if (in[j] >= cstart_orig && in[j] < cend_orig) {
256           col = in[j] - cstart_orig;
257           if (roworiented) value = v[i * n + j];
258           else value = v[i + j * m];
259           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
260         } else if (in[j] < 0) {
261           continue;
262         } else {
263           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
264           if (mat->was_assembled) {
265             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
266 #if defined(PETSC_USE_CTABLE)
267             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
268             col = col - 1;
269 #else
270             col = baij->colmap[in[j] / bs] - 1;
271 #endif
272             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
273               PetscCall(MatDisAssemble_MPIBAIJ(mat));
274               col = in[j];
275               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
276               B     = baij->B;
277               b     = (Mat_SeqBAIJ *)(B)->data;
278               bimax = b->imax;
279               bi    = b->i;
280               bilen = b->ilen;
281               bj    = b->j;
282               ba    = b->a;
283             } else {
284               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
285               col += in[j] % bs;
286             }
287           } else col = in[j];
288           if (roworiented) value = v[i * n + j];
289           else value = v[i + j * m];
290           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
291           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
292         }
293       }
294     } else {
295       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
296       if (!baij->donotstash) {
297         mat->assembled = PETSC_FALSE;
298         if (roworiented) {
299           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
300         } else {
301           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
302         }
303       }
304     }
305   }
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
310 {
311   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
312   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
313   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
314   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
315   PetscBool          roworiented = a->roworiented;
316   const PetscScalar *value       = v;
317   MatScalar         *ap, *aa = a->a, *bap;
318 
319   PetscFunctionBegin;
320   rp    = aj + ai[row];
321   ap    = aa + bs2 * ai[row];
322   rmax  = imax[row];
323   nrow  = ailen[row];
324   value = v;
325   low   = 0;
326   high  = nrow;
327   while (high - low > 7) {
328     t = (low + high) / 2;
329     if (rp[t] > col) high = t;
330     else low = t;
331   }
332   for (i = low; i < high; i++) {
333     if (rp[i] > col) break;
334     if (rp[i] == col) {
335       bap = ap + bs2 * i;
336       if (roworiented) {
337         if (is == ADD_VALUES) {
338           for (ii = 0; ii < bs; ii++) {
339             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
340           }
341         } else {
342           for (ii = 0; ii < bs; ii++) {
343             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
344           }
345         }
346       } else {
347         if (is == ADD_VALUES) {
348           for (ii = 0; ii < bs; ii++, value += bs) {
349             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
350             bap += bs;
351           }
352         } else {
353           for (ii = 0; ii < bs; ii++, value += bs) {
354             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
355             bap += bs;
356           }
357         }
358       }
359       goto noinsert2;
360     }
361   }
362   if (nonew == 1) goto noinsert2;
363   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
364   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
365   N = nrow++ - 1;
366   high++;
367   /* shift up all the later entries in this row */
368   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
369   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
370   rp[i] = col;
371   bap   = ap + bs2 * i;
372   if (roworiented) {
373     for (ii = 0; ii < bs; ii++) {
374       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
375     }
376   } else {
377     for (ii = 0; ii < bs; ii++) {
378       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
379     }
380   }
381 noinsert2:;
382   ailen[row] = nrow;
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 /*
387     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
388     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
389 */
390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
391 {
392   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
393   const PetscScalar *value;
394   MatScalar         *barray      = baij->barray;
395   PetscBool          roworiented = baij->roworiented;
396   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
397   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
398   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
399 
400   PetscFunctionBegin;
401   if (!barray) {
402     PetscCall(PetscMalloc1(bs2, &barray));
403     baij->barray = barray;
404   }
405 
406   if (roworiented) stepval = (n - 1) * bs;
407   else stepval = (m - 1) * bs;
408 
409   for (i = 0; i < m; i++) {
410     if (im[i] < 0) continue;
411     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
412     if (im[i] >= rstart && im[i] < rend) {
413       row = im[i] - rstart;
414       for (j = 0; j < n; j++) {
415         /* If NumCol = 1 then a copy is not required */
416         if ((roworiented) && (n == 1)) {
417           barray = (MatScalar *)v + i * bs2;
418         } else if ((!roworiented) && (m == 1)) {
419           barray = (MatScalar *)v + j * bs2;
420         } else { /* Here a copy is required */
421           if (roworiented) {
422             value = v + (i * (stepval + bs) + j) * bs;
423           } else {
424             value = v + (j * (stepval + bs) + i) * bs;
425           }
426           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
427             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
428             barray += bs;
429           }
430           barray -= bs2;
431         }
432 
433         if (in[j] >= cstart && in[j] < cend) {
434           col = in[j] - cstart;
435           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
436         } else if (in[j] < 0) {
437           continue;
438         } else {
439           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
440           if (mat->was_assembled) {
441             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
442 
443 #if defined(PETSC_USE_DEBUG)
444   #if defined(PETSC_USE_CTABLE)
445             {
446               PetscInt data;
447               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
448               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
449             }
450   #else
451             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
452   #endif
453 #endif
454 #if defined(PETSC_USE_CTABLE)
455             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
456             col = (col - 1) / bs;
457 #else
458             col = (baij->colmap[in[j]] - 1) / bs;
459 #endif
460             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
461               PetscCall(MatDisAssemble_MPIBAIJ(mat));
462               col = in[j];
463             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
464           } else col = in[j];
465           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
466         }
467       }
468     } else {
469       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
470       if (!baij->donotstash) {
471         if (roworiented) {
472           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
473         } else {
474           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475         }
476       }
477     }
478   }
479   PetscFunctionReturn(PETSC_SUCCESS);
480 }
481 
482 #define HASH_KEY             0.6180339887
483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
487 {
488   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
489   PetscBool    roworiented = baij->roworiented;
490   PetscInt     i, j, row, col;
491   PetscInt     rstart_orig = mat->rmap->rstart;
492   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
493   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
494   PetscReal    tmp;
495   MatScalar  **HD       = baij->hd, value;
496   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
497 
498   PetscFunctionBegin;
499   for (i = 0; i < m; i++) {
500     if (PetscDefined(USE_DEBUG)) {
501       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
502       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
503     }
504     row = im[i];
505     if (row >= rstart_orig && row < rend_orig) {
506       for (j = 0; j < n; j++) {
507         col = in[j];
508         if (roworiented) value = v[i * n + j];
509         else value = v[i + j * m];
510         /* Look up PetscInto the Hash Table */
511         key = (row / bs) * Nbs + (col / bs) + 1;
512         h1  = HASH(size, key, tmp);
513 
514         idx = h1;
515         if (PetscDefined(USE_DEBUG)) {
516           insert_ct++;
517           total_ct++;
518           if (HT[idx] != key) {
519             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
520               ;
521             if (idx == size) {
522               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
523                 ;
524               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
525             }
526           }
527         } else if (HT[idx] != key) {
528           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
529             ;
530           if (idx == size) {
531             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
532               ;
533             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
534           }
535         }
536         /* A HASH table entry is found, so insert the values at the correct address */
537         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
538         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
539       }
540     } else if (!baij->donotstash) {
541       if (roworiented) {
542         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
543       } else {
544         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
545       }
546     }
547   }
548   if (PetscDefined(USE_DEBUG)) {
549     baij->ht_total_ct += total_ct;
550     baij->ht_insert_ct += insert_ct;
551   }
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
556 {
557   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
558   PetscBool          roworiented = baij->roworiented;
559   PetscInt           i, j, ii, jj, row, col;
560   PetscInt           rstart = baij->rstartbs;
561   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
562   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
563   PetscReal          tmp;
564   MatScalar        **HD = baij->hd, *baij_a;
565   const PetscScalar *v_t, *value;
566   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
567 
568   PetscFunctionBegin;
569   if (roworiented) stepval = (n - 1) * bs;
570   else stepval = (m - 1) * bs;
571 
572   for (i = 0; i < m; i++) {
573     if (PetscDefined(USE_DEBUG)) {
574       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
575       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
576     }
577     row = im[i];
578     v_t = v + i * nbs2;
579     if (row >= rstart && row < rend) {
580       for (j = 0; j < n; j++) {
581         col = in[j];
582 
583         /* Look up into the Hash Table */
584         key = row * Nbs + col + 1;
585         h1  = HASH(size, key, tmp);
586 
587         idx = h1;
588         if (PetscDefined(USE_DEBUG)) {
589           total_ct++;
590           insert_ct++;
591           if (HT[idx] != key) {
592             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
593               ;
594             if (idx == size) {
595               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
596                 ;
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
602             ;
603           if (idx == size) {
604             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
605               ;
606             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
607           }
608         }
609         baij_a = HD[idx];
610         if (roworiented) {
611           /*value = v + i*(stepval+bs)*bs + j*bs;*/
612           /* value = v + (i*(stepval+bs)+j)*bs; */
613           value = v_t;
614           v_t += bs;
615           if (addv == ADD_VALUES) {
616             for (ii = 0; ii < bs; ii++, value += stepval) {
617               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
618             }
619           } else {
620             for (ii = 0; ii < bs; ii++, value += stepval) {
621               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
622             }
623           }
624         } else {
625           value = v + j * (stepval + bs) * bs + i * bs;
626           if (addv == ADD_VALUES) {
627             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
628               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
629             }
630           } else {
631             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
632               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
633             }
634           }
635         }
636       }
637     } else {
638       if (!baij->donotstash) {
639         if (roworiented) {
640           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         } else {
642           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         }
644       }
645     }
646   }
647   if (PetscDefined(USE_DEBUG)) {
648     baij->ht_total_ct += total_ct;
649     baij->ht_insert_ct += insert_ct;
650   }
651   PetscFunctionReturn(PETSC_SUCCESS);
652 }
653 
654 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
655 {
656   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
657   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
658   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
659 
660   PetscFunctionBegin;
661   for (i = 0; i < m; i++) {
662     if (idxm[i] < 0) continue; /* negative row */
663     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
664     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
665       row = idxm[i] - bsrstart;
666       for (j = 0; j < n; j++) {
667         if (idxn[j] < 0) continue; /* negative column */
668         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
669         if (idxn[j] >= bscstart && idxn[j] < bscend) {
670           col = idxn[j] - bscstart;
671           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
672         } else {
673           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
674 #if defined(PETSC_USE_CTABLE)
675           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
676           data--;
677 #else
678           data = baij->colmap[idxn[j] / bs] - 1;
679 #endif
680           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
681           else {
682             col = data + idxn[j] % bs;
683             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
684           }
685         }
686       }
687     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
688   }
689   PetscFunctionReturn(PETSC_SUCCESS);
690 }
691 
692 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
693 {
694   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
695   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
696   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
697   PetscReal    sum = 0.0;
698   MatScalar   *v;
699 
700   PetscFunctionBegin;
701   if (baij->size == 1) {
702     PetscCall(MatNorm(baij->A, type, nrm));
703   } else {
704     if (type == NORM_FROBENIUS) {
705       v  = amat->a;
706       nz = amat->nz * bs2;
707       for (i = 0; i < nz; i++) {
708         sum += PetscRealPart(PetscConj(*v) * (*v));
709         v++;
710       }
711       v  = bmat->a;
712       nz = bmat->nz * bs2;
713       for (i = 0; i < nz; i++) {
714         sum += PetscRealPart(PetscConj(*v) * (*v));
715         v++;
716       }
717       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
718       *nrm = PetscSqrtReal(*nrm);
719     } else if (type == NORM_1) { /* max column sum */
720       PetscReal *tmp, *tmp2;
721       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
722       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
723       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           col = bs * (cstart + *jj) + j; /* column index */
729           for (row = 0; row < bs; row++) {
730             tmp[col] += PetscAbsScalar(*v);
731             v++;
732           }
733         }
734         jj++;
735       }
736       v  = bmat->a;
737       jj = bmat->j;
738       for (i = 0; i < bmat->nz; i++) {
739         for (j = 0; j < bs; j++) {
740           col = bs * garray[*jj] + j;
741           for (row = 0; row < bs; row++) {
742             tmp[col] += PetscAbsScalar(*v);
743             v++;
744           }
745         }
746         jj++;
747       }
748       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
749       *nrm = 0.0;
750       for (j = 0; j < mat->cmap->N; j++) {
751         if (tmp2[j] > *nrm) *nrm = tmp2[j];
752       }
753       PetscCall(PetscFree(tmp));
754       PetscCall(PetscFree(tmp2));
755     } else if (type == NORM_INFINITY) { /* max row sum */
756       PetscReal *sums;
757       PetscCall(PetscMalloc1(bs, &sums));
758       sum = 0.0;
759       for (j = 0; j < amat->mbs; j++) {
760         for (row = 0; row < bs; row++) sums[row] = 0.0;
761         v  = amat->a + bs2 * amat->i[j];
762         nz = amat->i[j + 1] - amat->i[j];
763         for (i = 0; i < nz; i++) {
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               sums[row] += PetscAbsScalar(*v);
767               v++;
768             }
769           }
770         }
771         v  = bmat->a + bs2 * bmat->i[j];
772         nz = bmat->i[j + 1] - bmat->i[j];
773         for (i = 0; i < nz; i++) {
774           for (col = 0; col < bs; col++) {
775             for (row = 0; row < bs; row++) {
776               sums[row] += PetscAbsScalar(*v);
777               v++;
778             }
779           }
780         }
781         for (row = 0; row < bs; row++) {
782           if (sums[row] > sum) sum = sums[row];
783         }
784       }
785       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786       PetscCall(PetscFree(sums));
787     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788   }
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 /*
793   Creates the hash table, and sets the table
794   This table is created only once.
795   If new entried need to be added to the matrix
796   then the hash table has to be destroyed and
797   recreated.
798 */
799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800 {
801   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802   Mat          A = baij->A, B = baij->B;
803   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807   PetscInt    *HT, key;
808   MatScalar  **HD;
809   PetscReal    tmp;
810 #if defined(PETSC_USE_INFO)
811   PetscInt ct = 0, max = 0;
812 #endif
813 
814   PetscFunctionBegin;
815   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
816 
817   baij->ht_size = (PetscInt)(factor * nz);
818   ht_size       = baij->ht_size;
819 
820   /* Allocate Memory for Hash Table */
821   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822   HD = baij->hd;
823   HT = baij->ht;
824 
825   /* Loop Over A */
826   for (i = 0; i < a->mbs; i++) {
827     for (j = ai[i]; j < ai[i + 1]; j++) {
828       row = i + rstart;
829       col = aj[j] + cstart;
830 
831       key = row * Nbs + col + 1;
832       h1  = HASH(ht_size, key, tmp);
833       for (k = 0; k < ht_size; k++) {
834         if (!HT[(h1 + k) % ht_size]) {
835           HT[(h1 + k) % ht_size] = key;
836           HD[(h1 + k) % ht_size] = a->a + j * bs2;
837           break;
838 #if defined(PETSC_USE_INFO)
839         } else {
840           ct++;
841 #endif
842         }
843       }
844 #if defined(PETSC_USE_INFO)
845       if (k > max) max = k;
846 #endif
847     }
848   }
849   /* Loop Over B */
850   for (i = 0; i < b->mbs; i++) {
851     for (j = bi[i]; j < bi[i + 1]; j++) {
852       row = i + rstart;
853       col = garray[bj[j]];
854       key = row * Nbs + col + 1;
855       h1  = HASH(ht_size, key, tmp);
856       for (k = 0; k < ht_size; k++) {
857         if (!HT[(h1 + k) % ht_size]) {
858           HT[(h1 + k) % ht_size] = key;
859           HD[(h1 + k) % ht_size] = b->a + j * bs2;
860           break;
861 #if defined(PETSC_USE_INFO)
862         } else {
863           ct++;
864 #endif
865         }
866       }
867 #if defined(PETSC_USE_INFO)
868       if (k > max) max = k;
869 #endif
870     }
871   }
872 
873   /* Print Summary */
874 #if defined(PETSC_USE_INFO)
875   for (i = 0, j = 0; i < ht_size; i++) {
876     if (HT[i]) j++;
877   }
878   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
879 #endif
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886   PetscInt     nstash, reallocs;
887 
888   PetscFunctionBegin;
889   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
890 
891   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901 {
902   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
904   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
905   PetscInt    *row, *col;
906   PetscBool    r1, r2, r3, other_disassembled;
907   MatScalar   *val;
908   PetscMPIInt  n;
909 
910   PetscFunctionBegin;
911   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912   if (!baij->donotstash && !mat->nooffprocentries) {
913     while (1) {
914       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915       if (!flg) break;
916 
917       for (i = 0; i < n;) {
918         /* Now identify the consecutive vals belonging to the same row */
919         for (j = i, rstart = row[j]; j < n; j++) {
920           if (row[j] != rstart) break;
921         }
922         if (j < n) ncols = j - i;
923         else ncols = n - i;
924         /* Now assemble all these values with a single function call */
925         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926         i = j;
927       }
928     }
929     PetscCall(MatStashScatterEnd_Private(&mat->stash));
930     /* Now process the block-stash. Since the values are stashed column-oriented,
931        set the row-oriented flag to column-oriented, and after MatSetValues()
932        restore the original flags */
933     r1 = baij->roworiented;
934     r2 = a->roworiented;
935     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
936 
937     baij->roworiented                             = PETSC_FALSE;
938     a->roworiented                                = PETSC_FALSE;
939     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE;
940     while (1) {
941       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
942       if (!flg) break;
943 
944       for (i = 0; i < n;) {
945         /* Now identify the consecutive vals belonging to the same row */
946         for (j = i, rstart = row[j]; j < n; j++) {
947           if (row[j] != rstart) break;
948         }
949         if (j < n) ncols = j - i;
950         else ncols = n - i;
951         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
952         i = j;
953       }
954     }
955     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
956 
957     baij->roworiented                           = r1;
958     a->roworiented                              = r2;
959     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
960   }
961 
962   PetscCall(MatAssemblyBegin(baij->A, mode));
963   PetscCall(MatAssemblyEnd(baij->A, mode));
964 
965   /* determine if any processor has disassembled, if so we must
966      also disassemble ourselves, in order that we may reassemble. */
967   /*
968      if nonzero structure of submatrix B cannot change then we know that
969      no processor disassembled thus we can skip this stuff
970   */
971   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
972     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
973     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
974   }
975 
976   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
977   PetscCall(MatAssemblyBegin(baij->B, mode));
978   PetscCall(MatAssemblyEnd(baij->B, mode));
979 
980 #if defined(PETSC_USE_INFO)
981   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
982     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
983 
984     baij->ht_total_ct  = 0;
985     baij->ht_insert_ct = 0;
986   }
987 #endif
988   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
989     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
990 
991     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
992     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
993   }
994 
995   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
996 
997   baij->rowvalues = NULL;
998 
999   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1000   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
1001     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1002     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1003   }
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1008 #include <petscdraw.h>
1009 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1010 {
1011   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1012   PetscMPIInt       rank = baij->rank;
1013   PetscInt          bs   = mat->rmap->bs;
1014   PetscBool         iascii, isdraw;
1015   PetscViewer       sviewer;
1016   PetscViewerFormat format;
1017 
1018   PetscFunctionBegin;
1019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1020   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1021   if (iascii) {
1022     PetscCall(PetscViewerGetFormat(viewer, &format));
1023     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1024       MatInfo info;
1025       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1026       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1027       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1028       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1029                                                    mat->rmap->bs, (double)info.memory));
1030       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1031       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1032       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1033       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1034       PetscCall(PetscViewerFlush(viewer));
1035       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1036       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1037       PetscCall(VecScatterView(baij->Mvctx, viewer));
1038       PetscFunctionReturn(PETSC_SUCCESS);
1039     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1040       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1041       PetscFunctionReturn(PETSC_SUCCESS);
1042     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1043       PetscFunctionReturn(PETSC_SUCCESS);
1044     }
1045   }
1046 
1047   if (isdraw) {
1048     PetscDraw draw;
1049     PetscBool isnull;
1050     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1051     PetscCall(PetscDrawIsNull(draw, &isnull));
1052     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1053   }
1054 
1055   {
1056     /* assemble the entire matrix onto first processor. */
1057     Mat          A;
1058     Mat_SeqBAIJ *Aloc;
1059     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1060     MatScalar   *a;
1061     const char  *matname;
1062 
1063     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1064     /* Perhaps this should be the type of mat? */
1065     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1066     if (rank == 0) {
1067       PetscCall(MatSetSizes(A, M, N, M, N));
1068     } else {
1069       PetscCall(MatSetSizes(A, 0, 0, M, N));
1070     }
1071     PetscCall(MatSetType(A, MATMPIBAIJ));
1072     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1073     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1074 
1075     /* copy over the A part */
1076     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1077     ai   = Aloc->i;
1078     aj   = Aloc->j;
1079     a    = Aloc->a;
1080     PetscCall(PetscMalloc1(bs, &rvals));
1081 
1082     for (i = 0; i < mbs; i++) {
1083       rvals[0] = bs * (baij->rstartbs + i);
1084       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1085       for (j = ai[i]; j < ai[i + 1]; j++) {
1086         col = (baij->cstartbs + aj[j]) * bs;
1087         for (k = 0; k < bs; k++) {
1088           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1089           col++;
1090           a += bs;
1091         }
1092       }
1093     }
1094     /* copy over the B part */
1095     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1096     ai   = Aloc->i;
1097     aj   = Aloc->j;
1098     a    = Aloc->a;
1099     for (i = 0; i < mbs; i++) {
1100       rvals[0] = bs * (baij->rstartbs + i);
1101       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1102       for (j = ai[i]; j < ai[i + 1]; j++) {
1103         col = baij->garray[aj[j]] * bs;
1104         for (k = 0; k < bs; k++) {
1105           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1106           col++;
1107           a += bs;
1108         }
1109       }
1110     }
1111     PetscCall(PetscFree(rvals));
1112     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1113     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1114     /*
1115        Everyone has to call to draw the matrix since the graphics waits are
1116        synchronized across all processors that share the PetscDraw object
1117     */
1118     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1119     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1120     if (rank == 0) {
1121       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1122       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1123     }
1124     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1125     PetscCall(MatDestroy(&A));
1126   }
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1131 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1132 {
1133   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1134   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1135   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1136   const PetscInt *garray = aij->garray;
1137   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1138   PetscInt64      nz, hnz;
1139   PetscInt       *rowlens, *colidxs;
1140   PetscScalar    *matvals;
1141   PetscMPIInt     rank;
1142 
1143   PetscFunctionBegin;
1144   PetscCall(PetscViewerSetUp(viewer));
1145 
1146   M  = mat->rmap->N;
1147   N  = mat->cmap->N;
1148   m  = mat->rmap->n;
1149   rs = mat->rmap->rstart;
1150   cs = mat->cmap->rstart;
1151   bs = mat->rmap->bs;
1152   nz = bs * bs * (A->nz + B->nz);
1153 
1154   /* write matrix header */
1155   header[0] = MAT_FILE_CLASSID;
1156   header[1] = M;
1157   header[2] = N;
1158   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1159   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1160   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1161   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1162 
1163   /* fill in and store row lengths */
1164   PetscCall(PetscMalloc1(m, &rowlens));
1165   for (cnt = 0, i = 0; i < A->mbs; i++)
1166     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1167   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1168   PetscCall(PetscFree(rowlens));
1169 
1170   /* fill in and store column indices */
1171   PetscCall(PetscMalloc1(nz, &colidxs));
1172   for (cnt = 0, i = 0; i < A->mbs; i++) {
1173     for (k = 0; k < bs; k++) {
1174       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1175         if (garray[B->j[jb]] > cs / bs) break;
1176         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1177       }
1178       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1179         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1180       for (; jb < B->i[i + 1]; jb++)
1181         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1182     }
1183   }
1184   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1185   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1186   PetscCall(PetscFree(colidxs));
1187 
1188   /* fill in and store nonzero values */
1189   PetscCall(PetscMalloc1(nz, &matvals));
1190   for (cnt = 0, i = 0; i < A->mbs; i++) {
1191     for (k = 0; k < bs; k++) {
1192       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1193         if (garray[B->j[jb]] > cs / bs) break;
1194         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1195       }
1196       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1197         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1198       for (; jb < B->i[i + 1]; jb++)
1199         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1200     }
1201   }
1202   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1203   PetscCall(PetscFree(matvals));
1204 
1205   /* write block size option to the viewer's .info file */
1206   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1207   PetscFunctionReturn(PETSC_SUCCESS);
1208 }
1209 
1210 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1211 {
1212   PetscBool iascii, isdraw, issocket, isbinary;
1213 
1214   PetscFunctionBegin;
1215   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1216   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1217   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1218   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1219   if (iascii || isdraw || issocket) {
1220     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1221   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1222   PetscFunctionReturn(PETSC_SUCCESS);
1223 }
1224 
1225 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1226 {
1227   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1228   PetscInt     nt;
1229 
1230   PetscFunctionBegin;
1231   PetscCall(VecGetLocalSize(xx, &nt));
1232   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1233   PetscCall(VecGetLocalSize(yy, &nt));
1234   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1235   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1236   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1237   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1238   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1239   PetscFunctionReturn(PETSC_SUCCESS);
1240 }
1241 
1242 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1243 {
1244   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1245 
1246   PetscFunctionBegin;
1247   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1248   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1249   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1250   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1251   PetscFunctionReturn(PETSC_SUCCESS);
1252 }
1253 
1254 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1255 {
1256   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1257 
1258   PetscFunctionBegin;
1259   /* do nondiagonal part */
1260   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1261   /* do local part */
1262   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1263   /* add partial results together */
1264   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1265   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1266   PetscFunctionReturn(PETSC_SUCCESS);
1267 }
1268 
1269 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1270 {
1271   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1272 
1273   PetscFunctionBegin;
1274   /* do nondiagonal part */
1275   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1276   /* do local part */
1277   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1278   /* add partial results together */
1279   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1280   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1281   PetscFunctionReturn(PETSC_SUCCESS);
1282 }
1283 
1284 /*
1285   This only works correctly for square matrices where the subblock A->A is the
1286    diagonal block
1287 */
1288 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1289 {
1290   PetscFunctionBegin;
1291   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1292   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1293   PetscFunctionReturn(PETSC_SUCCESS);
1294 }
1295 
1296 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1297 {
1298   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1299 
1300   PetscFunctionBegin;
1301   PetscCall(MatScale(a->A, aa));
1302   PetscCall(MatScale(a->B, aa));
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305 
1306 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1307 {
1308   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1309   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1310   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1311   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1312   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1313 
1314   PetscFunctionBegin;
1315   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1316   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1317   mat->getrowactive = PETSC_TRUE;
1318 
1319   if (!mat->rowvalues && (idx || v)) {
1320     /*
1321         allocate enough space to hold information from the longest row.
1322     */
1323     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1324     PetscInt     max = 1, mbs = mat->mbs, tmp;
1325     for (i = 0; i < mbs; i++) {
1326       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1327       if (max < tmp) max = tmp;
1328     }
1329     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1330   }
1331   lrow = row - brstart;
1332 
1333   pvA = &vworkA;
1334   pcA = &cworkA;
1335   pvB = &vworkB;
1336   pcB = &cworkB;
1337   if (!v) {
1338     pvA = NULL;
1339     pvB = NULL;
1340   }
1341   if (!idx) {
1342     pcA = NULL;
1343     if (!v) pcB = NULL;
1344   }
1345   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1346   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1347   nztot = nzA + nzB;
1348 
1349   cmap = mat->garray;
1350   if (v || idx) {
1351     if (nztot) {
1352       /* Sort by increasing column numbers, assuming A and B already sorted */
1353       PetscInt imark = -1;
1354       if (v) {
1355         *v = v_p = mat->rowvalues;
1356         for (i = 0; i < nzB; i++) {
1357           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1358           else break;
1359         }
1360         imark = i;
1361         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1362         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1363       }
1364       if (idx) {
1365         *idx = idx_p = mat->rowindices;
1366         if (imark > -1) {
1367           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1368         } else {
1369           for (i = 0; i < nzB; i++) {
1370             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1371             else break;
1372           }
1373           imark = i;
1374         }
1375         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1376         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1377       }
1378     } else {
1379       if (idx) *idx = NULL;
1380       if (v) *v = NULL;
1381     }
1382   }
1383   *nz = nztot;
1384   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1385   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1386   PetscFunctionReturn(PETSC_SUCCESS);
1387 }
1388 
1389 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1390 {
1391   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1392 
1393   PetscFunctionBegin;
1394   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1395   baij->getrowactive = PETSC_FALSE;
1396   PetscFunctionReturn(PETSC_SUCCESS);
1397 }
1398 
1399 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1400 {
1401   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1402 
1403   PetscFunctionBegin;
1404   PetscCall(MatZeroEntries(l->A));
1405   PetscCall(MatZeroEntries(l->B));
1406   PetscFunctionReturn(PETSC_SUCCESS);
1407 }
1408 
1409 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1410 {
1411   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1412   Mat            A = a->A, B = a->B;
1413   PetscLogDouble isend[5], irecv[5];
1414 
1415   PetscFunctionBegin;
1416   info->block_size = (PetscReal)matin->rmap->bs;
1417 
1418   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1419 
1420   isend[0] = info->nz_used;
1421   isend[1] = info->nz_allocated;
1422   isend[2] = info->nz_unneeded;
1423   isend[3] = info->memory;
1424   isend[4] = info->mallocs;
1425 
1426   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1427 
1428   isend[0] += info->nz_used;
1429   isend[1] += info->nz_allocated;
1430   isend[2] += info->nz_unneeded;
1431   isend[3] += info->memory;
1432   isend[4] += info->mallocs;
1433 
1434   if (flag == MAT_LOCAL) {
1435     info->nz_used      = isend[0];
1436     info->nz_allocated = isend[1];
1437     info->nz_unneeded  = isend[2];
1438     info->memory       = isend[3];
1439     info->mallocs      = isend[4];
1440   } else if (flag == MAT_GLOBAL_MAX) {
1441     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1442 
1443     info->nz_used      = irecv[0];
1444     info->nz_allocated = irecv[1];
1445     info->nz_unneeded  = irecv[2];
1446     info->memory       = irecv[3];
1447     info->mallocs      = irecv[4];
1448   } else if (flag == MAT_GLOBAL_SUM) {
1449     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1450 
1451     info->nz_used      = irecv[0];
1452     info->nz_allocated = irecv[1];
1453     info->nz_unneeded  = irecv[2];
1454     info->memory       = irecv[3];
1455     info->mallocs      = irecv[4];
1456   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1457   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1458   info->fill_ratio_needed = 0;
1459   info->factor_mallocs    = 0;
1460   PetscFunctionReturn(PETSC_SUCCESS);
1461 }
1462 
1463 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1464 {
1465   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1466 
1467   PetscFunctionBegin;
1468   switch (op) {
1469   case MAT_NEW_NONZERO_LOCATIONS:
1470   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1471   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1472   case MAT_KEEP_NONZERO_PATTERN:
1473   case MAT_NEW_NONZERO_LOCATION_ERR:
1474     MatCheckPreallocated(A, 1);
1475     PetscCall(MatSetOption(a->A, op, flg));
1476     PetscCall(MatSetOption(a->B, op, flg));
1477     break;
1478   case MAT_ROW_ORIENTED:
1479     MatCheckPreallocated(A, 1);
1480     a->roworiented = flg;
1481 
1482     PetscCall(MatSetOption(a->A, op, flg));
1483     PetscCall(MatSetOption(a->B, op, flg));
1484     break;
1485   case MAT_FORCE_DIAGONAL_ENTRIES:
1486   case MAT_SORTED_FULL:
1487     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1488     break;
1489   case MAT_IGNORE_OFF_PROC_ENTRIES:
1490     a->donotstash = flg;
1491     break;
1492   case MAT_USE_HASH_TABLE:
1493     a->ht_flag = flg;
1494     a->ht_fact = 1.39;
1495     break;
1496   case MAT_SYMMETRIC:
1497   case MAT_STRUCTURALLY_SYMMETRIC:
1498   case MAT_HERMITIAN:
1499   case MAT_SUBMAT_SINGLEIS:
1500   case MAT_SYMMETRY_ETERNAL:
1501   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1502   case MAT_SPD_ETERNAL:
1503     /* if the diagonal matrix is square it inherits some of the properties above */
1504     break;
1505   default:
1506     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1507   }
1508   PetscFunctionReturn(PETSC_SUCCESS);
1509 }
1510 
1511 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1512 {
1513   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1514   Mat_SeqBAIJ *Aloc;
1515   Mat          B;
1516   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1517   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1518   MatScalar   *a;
1519 
1520   PetscFunctionBegin;
1521   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1522   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1523     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1524     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1525     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1526     /* Do not know preallocation information, but must set block size */
1527     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1528   } else {
1529     B = *matout;
1530   }
1531 
1532   /* copy over the A part */
1533   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1534   ai   = Aloc->i;
1535   aj   = Aloc->j;
1536   a    = Aloc->a;
1537   PetscCall(PetscMalloc1(bs, &rvals));
1538 
1539   for (i = 0; i < mbs; i++) {
1540     rvals[0] = bs * (baij->rstartbs + i);
1541     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1542     for (j = ai[i]; j < ai[i + 1]; j++) {
1543       col = (baij->cstartbs + aj[j]) * bs;
1544       for (k = 0; k < bs; k++) {
1545         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1546 
1547         col++;
1548         a += bs;
1549       }
1550     }
1551   }
1552   /* copy over the B part */
1553   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1554   ai   = Aloc->i;
1555   aj   = Aloc->j;
1556   a    = Aloc->a;
1557   for (i = 0; i < mbs; i++) {
1558     rvals[0] = bs * (baij->rstartbs + i);
1559     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1560     for (j = ai[i]; j < ai[i + 1]; j++) {
1561       col = baij->garray[aj[j]] * bs;
1562       for (k = 0; k < bs; k++) {
1563         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1564         col++;
1565         a += bs;
1566       }
1567     }
1568   }
1569   PetscCall(PetscFree(rvals));
1570   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1571   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1572 
1573   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1574   else PetscCall(MatHeaderMerge(A, &B));
1575   PetscFunctionReturn(PETSC_SUCCESS);
1576 }
1577 
1578 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1579 {
1580   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1581   Mat          a = baij->A, b = baij->B;
1582   PetscInt     s1, s2, s3;
1583 
1584   PetscFunctionBegin;
1585   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1586   if (rr) {
1587     PetscCall(VecGetLocalSize(rr, &s1));
1588     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1589     /* Overlap communication with computation. */
1590     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1591   }
1592   if (ll) {
1593     PetscCall(VecGetLocalSize(ll, &s1));
1594     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1595     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1596   }
1597   /* scale  the diagonal block */
1598   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1599 
1600   if (rr) {
1601     /* Do a scatter end and then right scale the off-diagonal block */
1602     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1603     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1604   }
1605   PetscFunctionReturn(PETSC_SUCCESS);
1606 }
1607 
1608 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1609 {
1610   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1611   PetscInt    *lrows;
1612   PetscInt     r, len;
1613   PetscBool    cong;
1614 
1615   PetscFunctionBegin;
1616   /* get locally owned rows */
1617   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1618   /* fix right hand side if needed */
1619   if (x && b) {
1620     const PetscScalar *xx;
1621     PetscScalar       *bb;
1622 
1623     PetscCall(VecGetArrayRead(x, &xx));
1624     PetscCall(VecGetArray(b, &bb));
1625     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1626     PetscCall(VecRestoreArrayRead(x, &xx));
1627     PetscCall(VecRestoreArray(b, &bb));
1628   }
1629 
1630   /* actually zap the local rows */
1631   /*
1632         Zero the required rows. If the "diagonal block" of the matrix
1633      is square and the user wishes to set the diagonal we use separate
1634      code so that MatSetValues() is not called for each diagonal allocating
1635      new memory, thus calling lots of mallocs and slowing things down.
1636 
1637   */
1638   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1639   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1640   PetscCall(MatHasCongruentLayouts(A, &cong));
1641   if ((diag != 0.0) && cong) {
1642     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1643   } else if (diag != 0.0) {
1644     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1645     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1646        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1647     for (r = 0; r < len; ++r) {
1648       const PetscInt row = lrows[r] + A->rmap->rstart;
1649       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1650     }
1651     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1652     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1653   } else {
1654     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1655   }
1656   PetscCall(PetscFree(lrows));
1657 
1658   /* only change matrix nonzero state if pattern was allowed to be changed */
1659   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern || !((Mat_SeqBAIJ *)(l->A->data))->nonew) {
1660     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1661     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1662   }
1663   PetscFunctionReturn(PETSC_SUCCESS);
1664 }
1665 
1666 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1667 {
1668   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1669   PetscMPIInt        n = A->rmap->n, p = 0;
1670   PetscInt           i, j, k, r, len = 0, row, col, count;
1671   PetscInt          *lrows, *owners = A->rmap->range;
1672   PetscSFNode       *rrows;
1673   PetscSF            sf;
1674   const PetscScalar *xx;
1675   PetscScalar       *bb, *mask;
1676   Vec                xmask, lmask;
1677   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1678   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1679   PetscScalar       *aa;
1680 
1681   PetscFunctionBegin;
1682   /* Create SF where leaves are input rows and roots are owned rows */
1683   PetscCall(PetscMalloc1(n, &lrows));
1684   for (r = 0; r < n; ++r) lrows[r] = -1;
1685   PetscCall(PetscMalloc1(N, &rrows));
1686   for (r = 0; r < N; ++r) {
1687     const PetscInt idx = rows[r];
1688     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1689     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1690       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1691     }
1692     rrows[r].rank  = p;
1693     rrows[r].index = rows[r] - owners[p];
1694   }
1695   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1696   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1697   /* Collect flags for rows to be zeroed */
1698   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1699   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1700   PetscCall(PetscSFDestroy(&sf));
1701   /* Compress and put in row numbers */
1702   for (r = 0; r < n; ++r)
1703     if (lrows[r] >= 0) lrows[len++] = r;
1704   /* zero diagonal part of matrix */
1705   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1706   /* handle off-diagonal part of matrix */
1707   PetscCall(MatCreateVecs(A, &xmask, NULL));
1708   PetscCall(VecDuplicate(l->lvec, &lmask));
1709   PetscCall(VecGetArray(xmask, &bb));
1710   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1711   PetscCall(VecRestoreArray(xmask, &bb));
1712   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1713   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1714   PetscCall(VecDestroy(&xmask));
1715   if (x) {
1716     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1717     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1718     PetscCall(VecGetArrayRead(l->lvec, &xx));
1719     PetscCall(VecGetArray(b, &bb));
1720   }
1721   PetscCall(VecGetArray(lmask, &mask));
1722   /* remove zeroed rows of off-diagonal matrix */
1723   for (i = 0; i < len; ++i) {
1724     row   = lrows[i];
1725     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1726     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1727     for (k = 0; k < count; ++k) {
1728       aa[0] = 0.0;
1729       aa += bs;
1730     }
1731   }
1732   /* loop over all elements of off process part of matrix zeroing removed columns*/
1733   for (i = 0; i < l->B->rmap->N; ++i) {
1734     row = i / bs;
1735     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1736       for (k = 0; k < bs; ++k) {
1737         col = bs * baij->j[j] + k;
1738         if (PetscAbsScalar(mask[col])) {
1739           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1740           if (x) bb[i] -= aa[0] * xx[col];
1741           aa[0] = 0.0;
1742         }
1743       }
1744     }
1745   }
1746   if (x) {
1747     PetscCall(VecRestoreArray(b, &bb));
1748     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1749   }
1750   PetscCall(VecRestoreArray(lmask, &mask));
1751   PetscCall(VecDestroy(&lmask));
1752   PetscCall(PetscFree(lrows));
1753 
1754   /* only change matrix nonzero state if pattern was allowed to be changed */
1755   if (!((Mat_SeqBAIJ *)(l->A->data))->nonew) {
1756     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1757     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1758   }
1759   PetscFunctionReturn(PETSC_SUCCESS);
1760 }
1761 
1762 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1763 {
1764   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1765 
1766   PetscFunctionBegin;
1767   PetscCall(MatSetUnfactored(a->A));
1768   PetscFunctionReturn(PETSC_SUCCESS);
1769 }
1770 
1771 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1772 
1773 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1774 {
1775   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1776   Mat          a, b, c, d;
1777   PetscBool    flg;
1778 
1779   PetscFunctionBegin;
1780   a = matA->A;
1781   b = matA->B;
1782   c = matB->A;
1783   d = matB->B;
1784 
1785   PetscCall(MatEqual(a, c, &flg));
1786   if (flg) PetscCall(MatEqual(b, d, &flg));
1787   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1788   PetscFunctionReturn(PETSC_SUCCESS);
1789 }
1790 
1791 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1792 {
1793   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1794   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1795 
1796   PetscFunctionBegin;
1797   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1798   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1799     PetscCall(MatCopy_Basic(A, B, str));
1800   } else {
1801     PetscCall(MatCopy(a->A, b->A, str));
1802     PetscCall(MatCopy(a->B, b->B, str));
1803   }
1804   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1805   PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807 
1808 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1809 {
1810   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1811   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1812   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1813 
1814   PetscFunctionBegin;
1815   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1816   PetscFunctionReturn(PETSC_SUCCESS);
1817 }
1818 
1819 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1820 {
1821   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1822   PetscBLASInt bnz, one                         = 1;
1823   Mat_SeqBAIJ *x, *y;
1824   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1825 
1826   PetscFunctionBegin;
1827   if (str == SAME_NONZERO_PATTERN) {
1828     PetscScalar alpha = a;
1829     x                 = (Mat_SeqBAIJ *)xx->A->data;
1830     y                 = (Mat_SeqBAIJ *)yy->A->data;
1831     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1832     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1833     x = (Mat_SeqBAIJ *)xx->B->data;
1834     y = (Mat_SeqBAIJ *)yy->B->data;
1835     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1836     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1837     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1838   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1839     PetscCall(MatAXPY_Basic(Y, a, X, str));
1840   } else {
1841     Mat       B;
1842     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1843     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1844     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1845     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1846     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1847     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1848     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1849     PetscCall(MatSetType(B, MATMPIBAIJ));
1850     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1851     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1852     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1853     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1854     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1855     PetscCall(MatHeaderMerge(Y, &B));
1856     PetscCall(PetscFree(nnz_d));
1857     PetscCall(PetscFree(nnz_o));
1858   }
1859   PetscFunctionReturn(PETSC_SUCCESS);
1860 }
1861 
1862 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1863 {
1864   PetscFunctionBegin;
1865   if (PetscDefined(USE_COMPLEX)) {
1866     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1867 
1868     PetscCall(MatConjugate_SeqBAIJ(a->A));
1869     PetscCall(MatConjugate_SeqBAIJ(a->B));
1870   }
1871   PetscFunctionReturn(PETSC_SUCCESS);
1872 }
1873 
1874 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1875 {
1876   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1877 
1878   PetscFunctionBegin;
1879   PetscCall(MatRealPart(a->A));
1880   PetscCall(MatRealPart(a->B));
1881   PetscFunctionReturn(PETSC_SUCCESS);
1882 }
1883 
1884 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1885 {
1886   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1887 
1888   PetscFunctionBegin;
1889   PetscCall(MatImaginaryPart(a->A));
1890   PetscCall(MatImaginaryPart(a->B));
1891   PetscFunctionReturn(PETSC_SUCCESS);
1892 }
1893 
1894 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1895 {
1896   IS       iscol_local;
1897   PetscInt csize;
1898 
1899   PetscFunctionBegin;
1900   PetscCall(ISGetLocalSize(iscol, &csize));
1901   if (call == MAT_REUSE_MATRIX) {
1902     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1903     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1904   } else {
1905     PetscCall(ISAllGather(iscol, &iscol_local));
1906   }
1907   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1908   if (call == MAT_INITIAL_MATRIX) {
1909     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1910     PetscCall(ISDestroy(&iscol_local));
1911   }
1912   PetscFunctionReturn(PETSC_SUCCESS);
1913 }
1914 
1915 /*
1916   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1917   in local and then by concatenating the local matrices the end result.
1918   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1919   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1920 */
1921 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1922 {
1923   PetscMPIInt  rank, size;
1924   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1925   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1926   Mat          M, Mreuse;
1927   MatScalar   *vwork, *aa;
1928   MPI_Comm     comm;
1929   IS           isrow_new, iscol_new;
1930   Mat_SeqBAIJ *aij;
1931 
1932   PetscFunctionBegin;
1933   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1934   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1935   PetscCallMPI(MPI_Comm_size(comm, &size));
1936   /* The compression and expansion should be avoided. Doesn't point
1937      out errors, might change the indices, hence buggey */
1938   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1939   if (isrow == iscol) {
1940     iscol_new = isrow_new;
1941     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1942   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1943 
1944   if (call == MAT_REUSE_MATRIX) {
1945     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1946     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1947     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1948   } else {
1949     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1950   }
1951   PetscCall(ISDestroy(&isrow_new));
1952   PetscCall(ISDestroy(&iscol_new));
1953   /*
1954       m - number of local rows
1955       n - number of columns (same on all processors)
1956       rstart - first row in new global matrix generated
1957   */
1958   PetscCall(MatGetBlockSize(mat, &bs));
1959   PetscCall(MatGetSize(Mreuse, &m, &n));
1960   m = m / bs;
1961   n = n / bs;
1962 
1963   if (call == MAT_INITIAL_MATRIX) {
1964     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1965     ii  = aij->i;
1966     jj  = aij->j;
1967 
1968     /*
1969         Determine the number of non-zeros in the diagonal and off-diagonal
1970         portions of the matrix in order to do correct preallocation
1971     */
1972 
1973     /* first get start and end of "diagonal" columns */
1974     if (csize == PETSC_DECIDE) {
1975       PetscCall(ISGetSize(isrow, &mglobal));
1976       if (mglobal == n * bs) { /* square matrix */
1977         nlocal = m;
1978       } else {
1979         nlocal = n / size + ((n % size) > rank);
1980       }
1981     } else {
1982       nlocal = csize / bs;
1983     }
1984     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1985     rstart = rend - nlocal;
1986     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1987 
1988     /* next, compute all the lengths */
1989     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1990     for (i = 0; i < m; i++) {
1991       jend = ii[i + 1] - ii[i];
1992       olen = 0;
1993       dlen = 0;
1994       for (j = 0; j < jend; j++) {
1995         if (*jj < rstart || *jj >= rend) olen++;
1996         else dlen++;
1997         jj++;
1998       }
1999       olens[i] = olen;
2000       dlens[i] = dlen;
2001     }
2002     PetscCall(MatCreate(comm, &M));
2003     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2004     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2005     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2006     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2007     PetscCall(PetscFree2(dlens, olens));
2008   } else {
2009     PetscInt ml, nl;
2010 
2011     M = *newmat;
2012     PetscCall(MatGetLocalSize(M, &ml, &nl));
2013     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2014     PetscCall(MatZeroEntries(M));
2015     /*
2016          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2017        rather than the slower MatSetValues().
2018     */
2019     M->was_assembled = PETSC_TRUE;
2020     M->assembled     = PETSC_FALSE;
2021   }
2022   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2023   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2024   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2025   ii  = aij->i;
2026   jj  = aij->j;
2027   aa  = aij->a;
2028   for (i = 0; i < m; i++) {
2029     row   = rstart / bs + i;
2030     nz    = ii[i + 1] - ii[i];
2031     cwork = jj;
2032     jj    = PetscSafePointerPlusOffset(jj, nz);
2033     vwork = aa;
2034     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2035     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2036   }
2037 
2038   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2039   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2040   *newmat = M;
2041 
2042   /* save submatrix used in processor for next request */
2043   if (call == MAT_INITIAL_MATRIX) {
2044     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2045     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2046   }
2047   PetscFunctionReturn(PETSC_SUCCESS);
2048 }
2049 
2050 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2051 {
2052   MPI_Comm        comm, pcomm;
2053   PetscInt        clocal_size, nrows;
2054   const PetscInt *rows;
2055   PetscMPIInt     size;
2056   IS              crowp, lcolp;
2057 
2058   PetscFunctionBegin;
2059   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2060   /* make a collective version of 'rowp' */
2061   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2062   if (pcomm == comm) {
2063     crowp = rowp;
2064   } else {
2065     PetscCall(ISGetSize(rowp, &nrows));
2066     PetscCall(ISGetIndices(rowp, &rows));
2067     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2068     PetscCall(ISRestoreIndices(rowp, &rows));
2069   }
2070   PetscCall(ISSetPermutation(crowp));
2071   /* make a local version of 'colp' */
2072   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2073   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2074   if (size == 1) {
2075     lcolp = colp;
2076   } else {
2077     PetscCall(ISAllGather(colp, &lcolp));
2078   }
2079   PetscCall(ISSetPermutation(lcolp));
2080   /* now we just get the submatrix */
2081   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2082   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2083   /* clean up */
2084   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2085   if (size > 1) PetscCall(ISDestroy(&lcolp));
2086   PetscFunctionReturn(PETSC_SUCCESS);
2087 }
2088 
2089 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2090 {
2091   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2092   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2093 
2094   PetscFunctionBegin;
2095   if (nghosts) *nghosts = B->nbs;
2096   if (ghosts) *ghosts = baij->garray;
2097   PetscFunctionReturn(PETSC_SUCCESS);
2098 }
2099 
2100 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2101 {
2102   Mat          B;
2103   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2104   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2105   Mat_SeqAIJ  *b;
2106   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2107   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2108   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2109 
2110   PetscFunctionBegin;
2111   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2112   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2113 
2114   /*   Tell every processor the number of nonzeros per row  */
2115   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2116   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2117   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2118   displs = recvcounts + size;
2119   for (i = 0; i < size; i++) {
2120     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2121     displs[i]     = A->rmap->range[i] / bs;
2122   }
2123   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2124   /* Create the sequential matrix of the same type as the local block diagonal  */
2125   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2126   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2127   PetscCall(MatSetType(B, MATSEQAIJ));
2128   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2129   b = (Mat_SeqAIJ *)B->data;
2130 
2131   /*     Copy my part of matrix column indices over  */
2132   sendcount  = ad->nz + bd->nz;
2133   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2134   a_jsendbuf = ad->j;
2135   b_jsendbuf = bd->j;
2136   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2137   cnt        = 0;
2138   for (i = 0; i < n; i++) {
2139     /* put in lower diagonal portion */
2140     m = bd->i[i + 1] - bd->i[i];
2141     while (m > 0) {
2142       /* is it above diagonal (in bd (compressed) numbering) */
2143       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2144       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2145       m--;
2146     }
2147 
2148     /* put in diagonal portion */
2149     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2150 
2151     /* put in upper diagonal portion */
2152     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2153   }
2154   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2155 
2156   /*  Gather all column indices to all processors  */
2157   for (i = 0; i < size; i++) {
2158     recvcounts[i] = 0;
2159     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2160   }
2161   displs[0] = 0;
2162   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2163   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2164   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2165   /* set the b->ilen (length of each row) values */
2166   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2167   /* set the b->i indices */
2168   b->i[0] = 0;
2169   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2170   PetscCall(PetscFree(lens));
2171   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2172   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2173   PetscCall(PetscFree(recvcounts));
2174 
2175   PetscCall(MatPropagateSymmetryOptions(A, B));
2176   *newmat = B;
2177   PetscFunctionReturn(PETSC_SUCCESS);
2178 }
2179 
2180 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2181 {
2182   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2183   Vec          bb1 = NULL;
2184 
2185   PetscFunctionBegin;
2186   if (flag == SOR_APPLY_UPPER) {
2187     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2188     PetscFunctionReturn(PETSC_SUCCESS);
2189   }
2190 
2191   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2192 
2193   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2194     if (flag & SOR_ZERO_INITIAL_GUESS) {
2195       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2196       its--;
2197     }
2198 
2199     while (its--) {
2200       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2201       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2202 
2203       /* update rhs: bb1 = bb - B*x */
2204       PetscCall(VecScale(mat->lvec, -1.0));
2205       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2206 
2207       /* local sweep */
2208       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2209     }
2210   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2211     if (flag & SOR_ZERO_INITIAL_GUESS) {
2212       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2213       its--;
2214     }
2215     while (its--) {
2216       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2217       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2218 
2219       /* update rhs: bb1 = bb - B*x */
2220       PetscCall(VecScale(mat->lvec, -1.0));
2221       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2222 
2223       /* local sweep */
2224       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2225     }
2226   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2227     if (flag & SOR_ZERO_INITIAL_GUESS) {
2228       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2229       its--;
2230     }
2231     while (its--) {
2232       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2233       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2234 
2235       /* update rhs: bb1 = bb - B*x */
2236       PetscCall(VecScale(mat->lvec, -1.0));
2237       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2238 
2239       /* local sweep */
2240       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2241     }
2242   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2243 
2244   PetscCall(VecDestroy(&bb1));
2245   PetscFunctionReturn(PETSC_SUCCESS);
2246 }
2247 
2248 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2249 {
2250   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2251   PetscInt     m, N, i, *garray = aij->garray;
2252   PetscInt     ib, jb, bs = A->rmap->bs;
2253   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2254   MatScalar   *a_val = a_aij->a;
2255   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2256   MatScalar   *b_val = b_aij->a;
2257   PetscReal   *work;
2258 
2259   PetscFunctionBegin;
2260   PetscCall(MatGetSize(A, &m, &N));
2261   PetscCall(PetscCalloc1(N, &work));
2262   if (type == NORM_2) {
2263     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2264       for (jb = 0; jb < bs; jb++) {
2265         for (ib = 0; ib < bs; ib++) {
2266           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2267           a_val++;
2268         }
2269       }
2270     }
2271     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2272       for (jb = 0; jb < bs; jb++) {
2273         for (ib = 0; ib < bs; ib++) {
2274           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2275           b_val++;
2276         }
2277       }
2278     }
2279   } else if (type == NORM_1) {
2280     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2281       for (jb = 0; jb < bs; jb++) {
2282         for (ib = 0; ib < bs; ib++) {
2283           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2284           a_val++;
2285         }
2286       }
2287     }
2288     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2289       for (jb = 0; jb < bs; jb++) {
2290         for (ib = 0; ib < bs; ib++) {
2291           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2292           b_val++;
2293         }
2294       }
2295     }
2296   } else if (type == NORM_INFINITY) {
2297     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2298       for (jb = 0; jb < bs; jb++) {
2299         for (ib = 0; ib < bs; ib++) {
2300           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2301           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2302           a_val++;
2303         }
2304       }
2305     }
2306     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2307       for (jb = 0; jb < bs; jb++) {
2308         for (ib = 0; ib < bs; ib++) {
2309           int col   = garray[b_aij->j[i]] * bs + jb;
2310           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2311           b_val++;
2312         }
2313       }
2314     }
2315   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2316     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2317       for (jb = 0; jb < bs; jb++) {
2318         for (ib = 0; ib < bs; ib++) {
2319           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2320           a_val++;
2321         }
2322       }
2323     }
2324     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2325       for (jb = 0; jb < bs; jb++) {
2326         for (ib = 0; ib < bs; ib++) {
2327           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2328           b_val++;
2329         }
2330       }
2331     }
2332   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2333     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2334       for (jb = 0; jb < bs; jb++) {
2335         for (ib = 0; ib < bs; ib++) {
2336           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2337           a_val++;
2338         }
2339       }
2340     }
2341     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2342       for (jb = 0; jb < bs; jb++) {
2343         for (ib = 0; ib < bs; ib++) {
2344           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2345           b_val++;
2346         }
2347       }
2348     }
2349   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2350   if (type == NORM_INFINITY) {
2351     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2352   } else {
2353     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2354   }
2355   PetscCall(PetscFree(work));
2356   if (type == NORM_2) {
2357     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2358   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2359     for (i = 0; i < N; i++) reductions[i] /= m;
2360   }
2361   PetscFunctionReturn(PETSC_SUCCESS);
2362 }
2363 
2364 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2365 {
2366   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2367 
2368   PetscFunctionBegin;
2369   PetscCall(MatInvertBlockDiagonal(a->A, values));
2370   A->factorerrortype             = a->A->factorerrortype;
2371   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2372   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2373   PetscFunctionReturn(PETSC_SUCCESS);
2374 }
2375 
2376 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2377 {
2378   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2379   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2380 
2381   PetscFunctionBegin;
2382   if (!Y->preallocated) {
2383     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2384   } else if (!aij->nz) {
2385     PetscInt nonew = aij->nonew;
2386     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2387     aij->nonew = nonew;
2388   }
2389   PetscCall(MatShift_Basic(Y, a));
2390   PetscFunctionReturn(PETSC_SUCCESS);
2391 }
2392 
2393 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2394 {
2395   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2396 
2397   PetscFunctionBegin;
2398   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2399   PetscCall(MatMissingDiagonal(a->A, missing, d));
2400   if (d) {
2401     PetscInt rstart;
2402     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2403     *d += rstart / A->rmap->bs;
2404   }
2405   PetscFunctionReturn(PETSC_SUCCESS);
2406 }
2407 
2408 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2409 {
2410   PetscFunctionBegin;
2411   *a = ((Mat_MPIBAIJ *)A->data)->A;
2412   PetscFunctionReturn(PETSC_SUCCESS);
2413 }
2414 
2415 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2416 {
2417   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2418 
2419   PetscFunctionBegin;
2420   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2421   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2422   PetscFunctionReturn(PETSC_SUCCESS);
2423 }
2424 
2425 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2426                                        MatGetRow_MPIBAIJ,
2427                                        MatRestoreRow_MPIBAIJ,
2428                                        MatMult_MPIBAIJ,
2429                                        /* 4*/ MatMultAdd_MPIBAIJ,
2430                                        MatMultTranspose_MPIBAIJ,
2431                                        MatMultTransposeAdd_MPIBAIJ,
2432                                        NULL,
2433                                        NULL,
2434                                        NULL,
2435                                        /*10*/ NULL,
2436                                        NULL,
2437                                        NULL,
2438                                        MatSOR_MPIBAIJ,
2439                                        MatTranspose_MPIBAIJ,
2440                                        /*15*/ MatGetInfo_MPIBAIJ,
2441                                        MatEqual_MPIBAIJ,
2442                                        MatGetDiagonal_MPIBAIJ,
2443                                        MatDiagonalScale_MPIBAIJ,
2444                                        MatNorm_MPIBAIJ,
2445                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2446                                        MatAssemblyEnd_MPIBAIJ,
2447                                        MatSetOption_MPIBAIJ,
2448                                        MatZeroEntries_MPIBAIJ,
2449                                        /*24*/ MatZeroRows_MPIBAIJ,
2450                                        NULL,
2451                                        NULL,
2452                                        NULL,
2453                                        NULL,
2454                                        /*29*/ MatSetUp_MPI_Hash,
2455                                        NULL,
2456                                        NULL,
2457                                        MatGetDiagonalBlock_MPIBAIJ,
2458                                        NULL,
2459                                        /*34*/ MatDuplicate_MPIBAIJ,
2460                                        NULL,
2461                                        NULL,
2462                                        NULL,
2463                                        NULL,
2464                                        /*39*/ MatAXPY_MPIBAIJ,
2465                                        MatCreateSubMatrices_MPIBAIJ,
2466                                        MatIncreaseOverlap_MPIBAIJ,
2467                                        MatGetValues_MPIBAIJ,
2468                                        MatCopy_MPIBAIJ,
2469                                        /*44*/ NULL,
2470                                        MatScale_MPIBAIJ,
2471                                        MatShift_MPIBAIJ,
2472                                        NULL,
2473                                        MatZeroRowsColumns_MPIBAIJ,
2474                                        /*49*/ NULL,
2475                                        NULL,
2476                                        NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2480                                        NULL,
2481                                        MatSetUnfactored_MPIBAIJ,
2482                                        MatPermute_MPIBAIJ,
2483                                        MatSetValuesBlocked_MPIBAIJ,
2484                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2485                                        MatDestroy_MPIBAIJ,
2486                                        MatView_MPIBAIJ,
2487                                        NULL,
2488                                        NULL,
2489                                        /*64*/ NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2495                                        NULL,
2496                                        NULL,
2497                                        NULL,
2498                                        NULL,
2499                                        /*74*/ NULL,
2500                                        MatFDColoringApply_BAIJ,
2501                                        NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        /*79*/ NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        MatLoad_MPIBAIJ,
2509                                        /*84*/ NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        /*89*/ NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        /*94*/ NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        /*99*/ NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        MatConjugate_MPIBAIJ,
2528                                        NULL,
2529                                        /*104*/ NULL,
2530                                        MatRealPart_MPIBAIJ,
2531                                        MatImaginaryPart_MPIBAIJ,
2532                                        NULL,
2533                                        NULL,
2534                                        /*109*/ NULL,
2535                                        NULL,
2536                                        NULL,
2537                                        NULL,
2538                                        MatMissingDiagonal_MPIBAIJ,
2539                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2540                                        NULL,
2541                                        MatGetGhosts_MPIBAIJ,
2542                                        NULL,
2543                                        NULL,
2544                                        /*119*/ NULL,
2545                                        NULL,
2546                                        NULL,
2547                                        NULL,
2548                                        MatGetMultiProcBlock_MPIBAIJ,
2549                                        /*124*/ NULL,
2550                                        MatGetColumnReductions_MPIBAIJ,
2551                                        MatInvertBlockDiagonal_MPIBAIJ,
2552                                        NULL,
2553                                        NULL,
2554                                        /*129*/ NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        /*134*/ NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        /*139*/ MatSetBlockSizes_Default,
2565                                        NULL,
2566                                        NULL,
2567                                        MatFDColoringSetUp_MPIXAIJ,
2568                                        NULL,
2569                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2570                                        NULL,
2571                                        NULL,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        /*150*/ NULL,
2576                                        MatEliminateZeros_MPIBAIJ,
2577                                        NULL};
2578 
2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2581 
2582 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2583 {
2584   PetscInt        m, rstart, cstart, cend;
2585   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2586   const PetscInt *JJ          = NULL;
2587   PetscScalar    *values      = NULL;
2588   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2589   PetscBool       nooffprocentries;
2590 
2591   PetscFunctionBegin;
2592   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2593   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2594   PetscCall(PetscLayoutSetUp(B->rmap));
2595   PetscCall(PetscLayoutSetUp(B->cmap));
2596   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2597   m      = B->rmap->n / bs;
2598   rstart = B->rmap->rstart / bs;
2599   cstart = B->cmap->rstart / bs;
2600   cend   = B->cmap->rend / bs;
2601 
2602   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2603   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2604   for (i = 0; i < m; i++) {
2605     nz = ii[i + 1] - ii[i];
2606     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2607     nz_max = PetscMax(nz_max, nz);
2608     dlen   = 0;
2609     olen   = 0;
2610     JJ     = jj + ii[i];
2611     for (j = 0; j < nz; j++) {
2612       if (*JJ < cstart || *JJ >= cend) olen++;
2613       else dlen++;
2614       JJ++;
2615     }
2616     d_nnz[i] = dlen;
2617     o_nnz[i] = olen;
2618   }
2619   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2620   PetscCall(PetscFree2(d_nnz, o_nnz));
2621 
2622   values = (PetscScalar *)V;
2623   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2624   for (i = 0; i < m; i++) {
2625     PetscInt        row   = i + rstart;
2626     PetscInt        ncols = ii[i + 1] - ii[i];
2627     const PetscInt *icols = jj + ii[i];
2628     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2629       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2630       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2631     } else { /* block ordering does not match so we can only insert one block at a time. */
2632       PetscInt j;
2633       for (j = 0; j < ncols; j++) {
2634         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2635         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2636       }
2637     }
2638   }
2639 
2640   if (!V) PetscCall(PetscFree(values));
2641   nooffprocentries    = B->nooffprocentries;
2642   B->nooffprocentries = PETSC_TRUE;
2643   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2644   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2645   B->nooffprocentries = nooffprocentries;
2646 
2647   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2648   PetscFunctionReturn(PETSC_SUCCESS);
2649 }
2650 
2651 /*@C
2652   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2653 
2654   Collective
2655 
2656   Input Parameters:
2657 + B  - the matrix
2658 . bs - the block size
2659 . i  - the indices into `j` for the start of each local row (starts with zero)
2660 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2661 - v  - optional values in the matrix, use `NULL` if not provided
2662 
2663   Level: advanced
2664 
2665   Notes:
2666   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2667   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2668   called this routine.
2669 
2670   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2671   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2672   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2673   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2674   block column and the second index is over columns within a block.
2675 
2676   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2677 
2678 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2679 @*/
2680 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2681 {
2682   PetscFunctionBegin;
2683   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2684   PetscValidType(B, 1);
2685   PetscValidLogicalCollectiveInt(B, bs, 2);
2686   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2691 {
2692   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2693   PetscInt     i;
2694   PetscMPIInt  size;
2695 
2696   PetscFunctionBegin;
2697   if (B->hash_active) {
2698     B->ops[0]      = b->cops;
2699     B->hash_active = PETSC_FALSE;
2700   }
2701   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2702   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2703   PetscCall(PetscLayoutSetUp(B->rmap));
2704   PetscCall(PetscLayoutSetUp(B->cmap));
2705   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2706 
2707   if (d_nnz) {
2708     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2709   }
2710   if (o_nnz) {
2711     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2712   }
2713 
2714   b->bs2 = bs * bs;
2715   b->mbs = B->rmap->n / bs;
2716   b->nbs = B->cmap->n / bs;
2717   b->Mbs = B->rmap->N / bs;
2718   b->Nbs = B->cmap->N / bs;
2719 
2720   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2721   b->rstartbs = B->rmap->rstart / bs;
2722   b->rendbs   = B->rmap->rend / bs;
2723   b->cstartbs = B->cmap->rstart / bs;
2724   b->cendbs   = B->cmap->rend / bs;
2725 
2726 #if defined(PETSC_USE_CTABLE)
2727   PetscCall(PetscHMapIDestroy(&b->colmap));
2728 #else
2729   PetscCall(PetscFree(b->colmap));
2730 #endif
2731   PetscCall(PetscFree(b->garray));
2732   PetscCall(VecDestroy(&b->lvec));
2733   PetscCall(VecScatterDestroy(&b->Mvctx));
2734 
2735   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2736 
2737   MatSeqXAIJGetOptions_Private(b->B);
2738   PetscCall(MatDestroy(&b->B));
2739   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2740   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2741   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2742   MatSeqXAIJRestoreOptions_Private(b->B);
2743 
2744   MatSeqXAIJGetOptions_Private(b->A);
2745   PetscCall(MatDestroy(&b->A));
2746   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2747   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2748   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2749   MatSeqXAIJRestoreOptions_Private(b->A);
2750 
2751   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2752   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2753   B->preallocated  = PETSC_TRUE;
2754   B->was_assembled = PETSC_FALSE;
2755   B->assembled     = PETSC_FALSE;
2756   PetscFunctionReturn(PETSC_SUCCESS);
2757 }
2758 
2759 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2760 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2761 
2762 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2763 {
2764   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2765   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2766   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2767   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2768 
2769   PetscFunctionBegin;
2770   PetscCall(PetscMalloc1(M + 1, &ii));
2771   ii[0] = 0;
2772   for (i = 0; i < M; i++) {
2773     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2774     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2775     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2776     /* remove one from count of matrix has diagonal */
2777     for (j = id[i]; j < id[i + 1]; j++) {
2778       if (jd[j] == i) {
2779         ii[i + 1]--;
2780         break;
2781       }
2782     }
2783   }
2784   PetscCall(PetscMalloc1(ii[M], &jj));
2785   cnt = 0;
2786   for (i = 0; i < M; i++) {
2787     for (j = io[i]; j < io[i + 1]; j++) {
2788       if (garray[jo[j]] > rstart) break;
2789       jj[cnt++] = garray[jo[j]];
2790     }
2791     for (k = id[i]; k < id[i + 1]; k++) {
2792       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2793     }
2794     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2795   }
2796   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2797   PetscFunctionReturn(PETSC_SUCCESS);
2798 }
2799 
2800 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2801 
2802 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2803 
2804 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2805 {
2806   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2807   Mat_MPIAIJ  *b;
2808   Mat          B;
2809 
2810   PetscFunctionBegin;
2811   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2812 
2813   if (reuse == MAT_REUSE_MATRIX) {
2814     B = *newmat;
2815   } else {
2816     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2817     PetscCall(MatSetType(B, MATMPIAIJ));
2818     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2819     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2820     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2821     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2822   }
2823   b = (Mat_MPIAIJ *)B->data;
2824 
2825   if (reuse == MAT_REUSE_MATRIX) {
2826     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2827     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2828   } else {
2829     PetscInt   *garray = a->garray;
2830     Mat_SeqAIJ *bB;
2831     PetscInt    bs, nnz;
2832     PetscCall(MatDestroy(&b->A));
2833     PetscCall(MatDestroy(&b->B));
2834     /* just clear out the data structure */
2835     PetscCall(MatDisAssemble_MPIAIJ(B));
2836     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2837     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2838 
2839     /* Global numbering for b->B columns */
2840     bB  = (Mat_SeqAIJ *)b->B->data;
2841     bs  = A->rmap->bs;
2842     nnz = bB->i[A->rmap->n];
2843     for (PetscInt k = 0; k < nnz; k++) {
2844       PetscInt bj = bB->j[k] / bs;
2845       PetscInt br = bB->j[k] % bs;
2846       bB->j[k]    = garray[bj] * bs + br;
2847     }
2848   }
2849   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2850   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2851 
2852   if (reuse == MAT_INPLACE_MATRIX) {
2853     PetscCall(MatHeaderReplace(A, &B));
2854   } else {
2855     *newmat = B;
2856   }
2857   PetscFunctionReturn(PETSC_SUCCESS);
2858 }
2859 
2860 /*MC
2861    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2862 
2863    Options Database Keys:
2864 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2865 . -mat_block_size <bs> - set the blocksize used to store the matrix
2866 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2867 - -mat_use_hash_table <fact> - set hash table factor
2868 
2869    Level: beginner
2870 
2871    Note:
2872     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2873     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2874 
2875 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2876 M*/
2877 
2878 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2879 
2880 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2881 {
2882   Mat_MPIBAIJ *b;
2883   PetscBool    flg = PETSC_FALSE;
2884 
2885   PetscFunctionBegin;
2886   PetscCall(PetscNew(&b));
2887   B->data      = (void *)b;
2888   B->ops[0]    = MatOps_Values;
2889   B->assembled = PETSC_FALSE;
2890 
2891   B->insertmode = NOT_SET_VALUES;
2892   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2893   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2894 
2895   /* build local table of row and column ownerships */
2896   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2897 
2898   /* build cache for off array entries formed */
2899   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2900 
2901   b->donotstash  = PETSC_FALSE;
2902   b->colmap      = NULL;
2903   b->garray      = NULL;
2904   b->roworiented = PETSC_TRUE;
2905 
2906   /* stuff used in block assembly */
2907   b->barray = NULL;
2908 
2909   /* stuff used for matrix vector multiply */
2910   b->lvec  = NULL;
2911   b->Mvctx = NULL;
2912 
2913   /* stuff for MatGetRow() */
2914   b->rowindices   = NULL;
2915   b->rowvalues    = NULL;
2916   b->getrowactive = PETSC_FALSE;
2917 
2918   /* hash table stuff */
2919   b->ht           = NULL;
2920   b->hd           = NULL;
2921   b->ht_size      = 0;
2922   b->ht_flag      = PETSC_FALSE;
2923   b->ht_fact      = 0;
2924   b->ht_total_ct  = 0;
2925   b->ht_insert_ct = 0;
2926 
2927   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2928   b->ijonly = PETSC_FALSE;
2929 
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2933 #if defined(PETSC_HAVE_HYPRE)
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2935 #endif
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2940   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2941   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2942   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2943   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2944 
2945   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2946   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2947   if (flg) {
2948     PetscReal fact = 1.39;
2949     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2950     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2951     if (fact <= 1.0) fact = 1.39;
2952     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2953     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2954   }
2955   PetscOptionsEnd();
2956   PetscFunctionReturn(PETSC_SUCCESS);
2957 }
2958 
2959 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2960 /*MC
2961    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2962 
2963    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2964    and `MATMPIBAIJ` otherwise.
2965 
2966    Options Database Keys:
2967 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2968 
2969   Level: beginner
2970 
2971 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2972 M*/
2973 
2974 /*@C
2975   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2976   (block compressed row).
2977 
2978   Collective
2979 
2980   Input Parameters:
2981 + B     - the matrix
2982 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2983           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2984 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2985            submatrix  (same for all local rows)
2986 . d_nnz - array containing the number of block nonzeros in the various block rows
2987            of the in diagonal portion of the local (possibly different for each block
2988            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2989            set it even if it is zero.
2990 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2991            submatrix (same for all local rows).
2992 - o_nnz - array containing the number of nonzeros in the various block rows of the
2993            off-diagonal portion of the local submatrix (possibly different for
2994            each block row) or `NULL`.
2995 
2996    If the *_nnz parameter is given then the *_nz parameter is ignored
2997 
2998   Options Database Keys:
2999 + -mat_block_size            - size of the blocks to use
3000 - -mat_use_hash_table <fact> - set hash table factor
3001 
3002   Level: intermediate
3003 
3004   Notes:
3005   For good matrix assembly performance
3006   the user should preallocate the matrix storage by setting the parameters
3007   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3008   performance can be increased by more than a factor of 50.
3009 
3010   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3011   than it must be used on all processors that share the object for that argument.
3012 
3013   Storage Information:
3014   For a square global matrix we define each processor's diagonal portion
3015   to be its local rows and the corresponding columns (a square submatrix);
3016   each processor's off-diagonal portion encompasses the remainder of the
3017   local matrix (a rectangular submatrix).
3018 
3019   The user can specify preallocated storage for the diagonal part of
3020   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3021   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3022   memory allocation.  Likewise, specify preallocated storage for the
3023   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3024 
3025   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3026   the figure below we depict these three local rows and all columns (0-11).
3027 
3028 .vb
3029            0 1 2 3 4 5 6 7 8 9 10 11
3030           --------------------------
3031    row 3  |o o o d d d o o o o  o  o
3032    row 4  |o o o d d d o o o o  o  o
3033    row 5  |o o o d d d o o o o  o  o
3034           --------------------------
3035 .ve
3036 
3037   Thus, any entries in the d locations are stored in the d (diagonal)
3038   submatrix, and any entries in the o locations are stored in the
3039   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3040   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3041 
3042   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3043   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3044   In general, for PDE problems in which most nonzeros are near the diagonal,
3045   one expects `d_nz` >> `o_nz`.
3046 
3047   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3048   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3049   You can also run with the option `-info` and look for messages with the string
3050   malloc in them to see if additional memory allocation was needed.
3051 
3052 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3053 @*/
3054 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3055 {
3056   PetscFunctionBegin;
3057   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3058   PetscValidType(B, 1);
3059   PetscValidLogicalCollectiveInt(B, bs, 2);
3060   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3061   PetscFunctionReturn(PETSC_SUCCESS);
3062 }
3063 
3064 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3065 /*@C
3066   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3067   (block compressed row).
3068 
3069   Collective
3070 
3071   Input Parameters:
3072 + comm  - MPI communicator
3073 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3074           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3075 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3076            This value should be the same as the local size used in creating the
3077            y vector for the matrix-vector product y = Ax.
3078 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3079            This value should be the same as the local size used in creating the
3080            x vector for the matrix-vector product y = Ax.
3081 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3082 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3083 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3084            submatrix  (same for all local rows)
3085 . d_nnz - array containing the number of nonzero blocks in the various block rows
3086            of the in diagonal portion of the local (possibly different for each block
3087            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3088            and set it even if it is zero.
3089 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3090            submatrix (same for all local rows).
3091 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3092            off-diagonal portion of the local submatrix (possibly different for
3093            each block row) or NULL.
3094 
3095   Output Parameter:
3096 . A - the matrix
3097 
3098   Options Database Keys:
3099 + -mat_block_size            - size of the blocks to use
3100 - -mat_use_hash_table <fact> - set hash table factor
3101 
3102   Level: intermediate
3103 
3104   Notes:
3105   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3106   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3107   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3108 
3109   For good matrix assembly performance
3110   the user should preallocate the matrix storage by setting the parameters
3111   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3112   performance can be increased by more than a factor of 50.
3113 
3114   If the *_nnz parameter is given then the *_nz parameter is ignored
3115 
3116   A nonzero block is any block that as 1 or more nonzeros in it
3117 
3118   The user MUST specify either the local or global matrix dimensions
3119   (possibly both).
3120 
3121   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3122   than it must be used on all processors that share the object for that argument.
3123 
3124   Storage Information:
3125   For a square global matrix we define each processor's diagonal portion
3126   to be its local rows and the corresponding columns (a square submatrix);
3127   each processor's off-diagonal portion encompasses the remainder of the
3128   local matrix (a rectangular submatrix).
3129 
3130   The user can specify preallocated storage for the diagonal part of
3131   the local submatrix with either d_nz or d_nnz (not both).  Set
3132   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3133   memory allocation.  Likewise, specify preallocated storage for the
3134   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3135 
3136   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3137   the figure below we depict these three local rows and all columns (0-11).
3138 
3139 .vb
3140            0 1 2 3 4 5 6 7 8 9 10 11
3141           --------------------------
3142    row 3  |o o o d d d o o o o  o  o
3143    row 4  |o o o d d d o o o o  o  o
3144    row 5  |o o o d d d o o o o  o  o
3145           --------------------------
3146 .ve
3147 
3148   Thus, any entries in the d locations are stored in the d (diagonal)
3149   submatrix, and any entries in the o locations are stored in the
3150   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3151   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3152 
3153   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3154   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3155   In general, for PDE problems in which most nonzeros are near the diagonal,
3156   one expects `d_nz` >> `o_nz`.
3157 
3158 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3159 @*/
3160 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3161 {
3162   PetscMPIInt size;
3163 
3164   PetscFunctionBegin;
3165   PetscCall(MatCreate(comm, A));
3166   PetscCall(MatSetSizes(*A, m, n, M, N));
3167   PetscCallMPI(MPI_Comm_size(comm, &size));
3168   if (size > 1) {
3169     PetscCall(MatSetType(*A, MATMPIBAIJ));
3170     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3171   } else {
3172     PetscCall(MatSetType(*A, MATSEQBAIJ));
3173     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3174   }
3175   PetscFunctionReturn(PETSC_SUCCESS);
3176 }
3177 
3178 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3179 {
3180   Mat          mat;
3181   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3182   PetscInt     len = 0;
3183 
3184   PetscFunctionBegin;
3185   *newmat = NULL;
3186   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3187   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3188   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3189 
3190   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3191   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3192   if (matin->hash_active) {
3193     PetscCall(MatSetUp(mat));
3194   } else {
3195     mat->factortype   = matin->factortype;
3196     mat->preallocated = PETSC_TRUE;
3197     mat->assembled    = PETSC_TRUE;
3198     mat->insertmode   = NOT_SET_VALUES;
3199 
3200     a             = (Mat_MPIBAIJ *)mat->data;
3201     mat->rmap->bs = matin->rmap->bs;
3202     a->bs2        = oldmat->bs2;
3203     a->mbs        = oldmat->mbs;
3204     a->nbs        = oldmat->nbs;
3205     a->Mbs        = oldmat->Mbs;
3206     a->Nbs        = oldmat->Nbs;
3207 
3208     a->size         = oldmat->size;
3209     a->rank         = oldmat->rank;
3210     a->donotstash   = oldmat->donotstash;
3211     a->roworiented  = oldmat->roworiented;
3212     a->rowindices   = NULL;
3213     a->rowvalues    = NULL;
3214     a->getrowactive = PETSC_FALSE;
3215     a->barray       = NULL;
3216     a->rstartbs     = oldmat->rstartbs;
3217     a->rendbs       = oldmat->rendbs;
3218     a->cstartbs     = oldmat->cstartbs;
3219     a->cendbs       = oldmat->cendbs;
3220 
3221     /* hash table stuff */
3222     a->ht           = NULL;
3223     a->hd           = NULL;
3224     a->ht_size      = 0;
3225     a->ht_flag      = oldmat->ht_flag;
3226     a->ht_fact      = oldmat->ht_fact;
3227     a->ht_total_ct  = 0;
3228     a->ht_insert_ct = 0;
3229 
3230     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3231     if (oldmat->colmap) {
3232 #if defined(PETSC_USE_CTABLE)
3233       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3234 #else
3235       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3236       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3237 #endif
3238     } else a->colmap = NULL;
3239 
3240     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3241       PetscCall(PetscMalloc1(len, &a->garray));
3242       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3243     } else a->garray = NULL;
3244 
3245     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3246     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3247     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3248 
3249     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3250     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3251   }
3252   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3253   *newmat = mat;
3254   PetscFunctionReturn(PETSC_SUCCESS);
3255 }
3256 
3257 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3258 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3259 {
3260   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3261   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3262   PetscScalar *matvals;
3263 
3264   PetscFunctionBegin;
3265   PetscCall(PetscViewerSetUp(viewer));
3266 
3267   /* read in matrix header */
3268   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3269   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3270   M  = header[1];
3271   N  = header[2];
3272   nz = header[3];
3273   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3274   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3275   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3276 
3277   /* set block sizes from the viewer's .info file */
3278   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3279   /* set local sizes if not set already */
3280   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3281   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3282   /* set global sizes if not set already */
3283   if (mat->rmap->N < 0) mat->rmap->N = M;
3284   if (mat->cmap->N < 0) mat->cmap->N = N;
3285   PetscCall(PetscLayoutSetUp(mat->rmap));
3286   PetscCall(PetscLayoutSetUp(mat->cmap));
3287 
3288   /* check if the matrix sizes are correct */
3289   PetscCall(MatGetSize(mat, &rows, &cols));
3290   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3291   PetscCall(MatGetBlockSize(mat, &bs));
3292   PetscCall(MatGetLocalSize(mat, &m, &n));
3293   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3294   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3295   mbs = m / bs;
3296   nbs = n / bs;
3297 
3298   /* read in row lengths and build row indices */
3299   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3300   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3301   rowidxs[0] = 0;
3302   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3303   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3304   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3305 
3306   /* read in column indices and matrix values */
3307   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3308   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3309   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3310 
3311   {                /* preallocate matrix storage */
3312     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3313     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3314     PetscBool  sbaij, done;
3315     PetscInt  *d_nnz, *o_nnz;
3316 
3317     PetscCall(PetscBTCreate(nbs, &bt));
3318     PetscCall(PetscHSetICreate(&ht));
3319     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3320     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3321     for (i = 0; i < mbs; i++) {
3322       PetscCall(PetscBTMemzero(nbs, bt));
3323       PetscCall(PetscHSetIClear(ht));
3324       for (k = 0; k < bs; k++) {
3325         PetscInt row = bs * i + k;
3326         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3327           PetscInt col = colidxs[j];
3328           if (!sbaij || col >= row) {
3329             if (col >= cs && col < ce) {
3330               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3331             } else {
3332               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3333               if (done) o_nnz[i]++;
3334             }
3335           }
3336         }
3337       }
3338     }
3339     PetscCall(PetscBTDestroy(&bt));
3340     PetscCall(PetscHSetIDestroy(&ht));
3341     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3342     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3343     PetscCall(PetscFree2(d_nnz, o_nnz));
3344   }
3345 
3346   /* store matrix values */
3347   for (i = 0; i < m; i++) {
3348     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3349     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3350   }
3351 
3352   PetscCall(PetscFree(rowidxs));
3353   PetscCall(PetscFree2(colidxs, matvals));
3354   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3355   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3356   PetscFunctionReturn(PETSC_SUCCESS);
3357 }
3358 
3359 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3360 {
3361   PetscBool isbinary;
3362 
3363   PetscFunctionBegin;
3364   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3365   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3366   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3367   PetscFunctionReturn(PETSC_SUCCESS);
3368 }
3369 
3370 /*@
3371   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3372 
3373   Input Parameters:
3374 + mat  - the matrix
3375 - fact - factor
3376 
3377   Options Database Key:
3378 . -mat_use_hash_table <fact> - provide the factor
3379 
3380   Level: advanced
3381 
3382 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3383 @*/
3384 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3385 {
3386   PetscFunctionBegin;
3387   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3388   PetscFunctionReturn(PETSC_SUCCESS);
3389 }
3390 
3391 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3392 {
3393   Mat_MPIBAIJ *baij;
3394 
3395   PetscFunctionBegin;
3396   baij          = (Mat_MPIBAIJ *)mat->data;
3397   baij->ht_fact = fact;
3398   PetscFunctionReturn(PETSC_SUCCESS);
3399 }
3400 
3401 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3402 {
3403   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3404   PetscBool    flg;
3405 
3406   PetscFunctionBegin;
3407   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3408   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3409   if (Ad) *Ad = a->A;
3410   if (Ao) *Ao = a->B;
3411   if (colmap) *colmap = a->garray;
3412   PetscFunctionReturn(PETSC_SUCCESS);
3413 }
3414 
3415 /*
3416     Special version for direct calls from Fortran (to eliminate two function call overheads
3417 */
3418 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3419   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3420 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3421   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3422 #endif
3423 
3424 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3425 /*@C
3426   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3427 
3428   Collective
3429 
3430   Input Parameters:
3431 + matin  - the matrix
3432 . min    - number of input rows
3433 . im     - input rows
3434 . nin    - number of input columns
3435 . in     - input columns
3436 . v      - numerical values input
3437 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3438 
3439   Level: advanced
3440 
3441   Developer Notes:
3442   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3443 
3444 .seealso: `Mat`, `MatSetValuesBlocked()`
3445 @*/
3446 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3447 {
3448   /* convert input arguments to C version */
3449   Mat        mat = *matin;
3450   PetscInt   m = *min, n = *nin;
3451   InsertMode addv = *addvin;
3452 
3453   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3454   const MatScalar *value;
3455   MatScalar       *barray      = baij->barray;
3456   PetscBool        roworiented = baij->roworiented;
3457   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3458   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3459   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3460 
3461   PetscFunctionBegin;
3462   /* tasks normally handled by MatSetValuesBlocked() */
3463   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3464   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3465   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3466   if (mat->assembled) {
3467     mat->was_assembled = PETSC_TRUE;
3468     mat->assembled     = PETSC_FALSE;
3469   }
3470   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3471 
3472   if (!barray) {
3473     PetscCall(PetscMalloc1(bs2, &barray));
3474     baij->barray = barray;
3475   }
3476 
3477   if (roworiented) stepval = (n - 1) * bs;
3478   else stepval = (m - 1) * bs;
3479 
3480   for (i = 0; i < m; i++) {
3481     if (im[i] < 0) continue;
3482     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3483     if (im[i] >= rstart && im[i] < rend) {
3484       row = im[i] - rstart;
3485       for (j = 0; j < n; j++) {
3486         /* If NumCol = 1 then a copy is not required */
3487         if ((roworiented) && (n == 1)) {
3488           barray = (MatScalar *)v + i * bs2;
3489         } else if ((!roworiented) && (m == 1)) {
3490           barray = (MatScalar *)v + j * bs2;
3491         } else { /* Here a copy is required */
3492           if (roworiented) {
3493             value = v + i * (stepval + bs) * bs + j * bs;
3494           } else {
3495             value = v + j * (stepval + bs) * bs + i * bs;
3496           }
3497           for (ii = 0; ii < bs; ii++, value += stepval) {
3498             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3499           }
3500           barray -= bs2;
3501         }
3502 
3503         if (in[j] >= cstart && in[j] < cend) {
3504           col = in[j] - cstart;
3505           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3506         } else if (in[j] < 0) {
3507           continue;
3508         } else {
3509           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3510           if (mat->was_assembled) {
3511             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3512 
3513 #if defined(PETSC_USE_DEBUG)
3514   #if defined(PETSC_USE_CTABLE)
3515             {
3516               PetscInt data;
3517               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3518               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3519             }
3520   #else
3521             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3522   #endif
3523 #endif
3524 #if defined(PETSC_USE_CTABLE)
3525             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3526             col = (col - 1) / bs;
3527 #else
3528             col = (baij->colmap[in[j]] - 1) / bs;
3529 #endif
3530             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3531               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3532               col = in[j];
3533             }
3534           } else col = in[j];
3535           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3536         }
3537       }
3538     } else {
3539       if (!baij->donotstash) {
3540         if (roworiented) {
3541           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3542         } else {
3543           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3544         }
3545       }
3546     }
3547   }
3548 
3549   /* task normally handled by MatSetValuesBlocked() */
3550   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3551   PetscFunctionReturn(PETSC_SUCCESS);
3552 }
3553 
3554 /*@
3555   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3556 
3557   Collective
3558 
3559   Input Parameters:
3560 + comm - MPI communicator
3561 . bs   - the block size, only a block size of 1 is supported
3562 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3563 . n    - This value should be the same as the local size used in creating the
3564          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3565          calculated if `N` is given) For square matrices `n` is almost always `m`.
3566 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3567 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3568 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3569 . j    - column indices
3570 - a    - matrix values
3571 
3572   Output Parameter:
3573 . mat - the matrix
3574 
3575   Level: intermediate
3576 
3577   Notes:
3578   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3579   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3580   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3581 
3582   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3583   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3584   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3585   with column-major ordering within blocks.
3586 
3587   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3588 
3589 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3590           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3591 @*/
3592 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3593 {
3594   PetscFunctionBegin;
3595   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3596   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3597   PetscCall(MatCreate(comm, mat));
3598   PetscCall(MatSetSizes(*mat, m, n, M, N));
3599   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3600   PetscCall(MatSetBlockSize(*mat, bs));
3601   PetscCall(MatSetUp(*mat));
3602   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3603   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3604   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3605   PetscFunctionReturn(PETSC_SUCCESS);
3606 }
3607 
3608 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3609 {
3610   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3611   PetscInt    *indx;
3612   PetscScalar *values;
3613 
3614   PetscFunctionBegin;
3615   PetscCall(MatGetSize(inmat, &m, &N));
3616   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3617     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3618     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3619     PetscInt    *bindx, rmax = a->rmax, j;
3620     PetscMPIInt  rank, size;
3621 
3622     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3623     mbs = m / bs;
3624     Nbs = N / cbs;
3625     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3626     nbs = n / cbs;
3627 
3628     PetscCall(PetscMalloc1(rmax, &bindx));
3629     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3630 
3631     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3632     PetscCallMPI(MPI_Comm_rank(comm, &size));
3633     if (rank == size - 1) {
3634       /* Check sum(nbs) = Nbs */
3635       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3636     }
3637 
3638     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3639     for (i = 0; i < mbs; i++) {
3640       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3641       nnz = nnz / bs;
3642       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3643       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3644       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3645     }
3646     PetscCall(PetscFree(bindx));
3647 
3648     PetscCall(MatCreate(comm, outmat));
3649     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3650     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3651     PetscCall(MatSetType(*outmat, MATBAIJ));
3652     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3653     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3654     MatPreallocateEnd(dnz, onz);
3655     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3656   }
3657 
3658   /* numeric phase */
3659   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3660   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3661 
3662   for (i = 0; i < m; i++) {
3663     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3664     Ii = i + rstart;
3665     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3666     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3667   }
3668   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3669   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3670   PetscFunctionReturn(PETSC_SUCCESS);
3671 }
3672