xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f13dfd9ea68e0ddeee984e65c377a1819eab8a8a)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
101 {
102   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
103 
104   PetscFunctionBegin;
105   PetscCall(MatStoreValues(aij->A));
106   PetscCall(MatStoreValues(aij->B));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
111 {
112   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
113 
114   PetscFunctionBegin;
115   PetscCall(MatRetrieveValues(aij->A));
116   PetscCall(MatRetrieveValues(aij->B));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 /*
121      Local utility routine that creates a mapping from the global column
122    number to the local number in the off-diagonal part of the local
123    storage of the matrix.  This is done in a non scalable way since the
124    length of colmap equals the global matrix length.
125 */
126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
127 {
128   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
129   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
130   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
131 
132   PetscFunctionBegin;
133 #if defined(PETSC_USE_CTABLE)
134   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
135   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
136 #else
137   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
138   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
139 #endif
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
144   do { \
145     brow = row / bs; \
146     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
147     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
148     rmax = aimax[brow]; \
149     nrow = ailen[brow]; \
150     bcol = col / bs; \
151     ridx = row % bs; \
152     cidx = col % bs; \
153     low  = 0; \
154     high = nrow; \
155     while (high - low > 3) { \
156       t = (low + high) / 2; \
157       if (rp[t] > bcol) high = t; \
158       else low = t; \
159     } \
160     for (_i = low; _i < high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap + bs2 * _i + bs * cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value; \
165         else *bap = value; \
166         goto a_noinsert; \
167       } \
168     } \
169     if (a->nonew == 1) goto a_noinsert; \
170     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
171     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
172     N = nrow++ - 1; \
173     /* shift up all the later entries in this row */ \
174     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
175     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
176     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
177     rp[_i]                          = bcol; \
178     ap[bs2 * _i + bs * cidx + ridx] = value; \
179   a_noinsert:; \
180     ailen[brow] = nrow; \
181   } while (0)
182 
183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
184   do { \
185     brow = row / bs; \
186     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
187     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
188     rmax = bimax[brow]; \
189     nrow = bilen[brow]; \
190     bcol = col / bs; \
191     ridx = row % bs; \
192     cidx = col % bs; \
193     low  = 0; \
194     high = nrow; \
195     while (high - low > 3) { \
196       t = (low + high) / 2; \
197       if (rp[t] > bcol) high = t; \
198       else low = t; \
199     } \
200     for (_i = low; _i < high; _i++) { \
201       if (rp[_i] > bcol) break; \
202       if (rp[_i] == bcol) { \
203         bap = ap + bs2 * _i + bs * cidx + ridx; \
204         if (addv == ADD_VALUES) *bap += value; \
205         else *bap = value; \
206         goto b_noinsert; \
207       } \
208     } \
209     if (b->nonew == 1) goto b_noinsert; \
210     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
211     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
212     N = nrow++ - 1; \
213     /* shift up all the later entries in this row */ \
214     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
215     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
216     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
217     rp[_i]                          = bcol; \
218     ap[bs2 * _i + bs * cidx + ridx] = value; \
219   b_noinsert:; \
220     bilen[brow] = nrow; \
221   } while (0)
222 
223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
224 {
225   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
226   MatScalar    value;
227   PetscBool    roworiented = baij->roworiented;
228   PetscInt     i, j, row, col;
229   PetscInt     rstart_orig = mat->rmap->rstart;
230   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
231   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
232 
233   /* Some Variables required in the macro */
234   Mat          A     = baij->A;
235   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
236   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
237   MatScalar   *aa = a->a;
238 
239   Mat          B     = baij->B;
240   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
241   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
242   MatScalar   *ba = b->a;
243 
244   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
245   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
246   MatScalar *ap, *bap;
247 
248   PetscFunctionBegin;
249   for (i = 0; i < m; i++) {
250     if (im[i] < 0) continue;
251     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
252     if (im[i] >= rstart_orig && im[i] < rend_orig) {
253       row = im[i] - rstart_orig;
254       for (j = 0; j < n; j++) {
255         if (in[j] >= cstart_orig && in[j] < cend_orig) {
256           col = in[j] - cstart_orig;
257           if (roworiented) value = v[i * n + j];
258           else value = v[i + j * m];
259           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
260         } else if (in[j] < 0) {
261           continue;
262         } else {
263           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
264           if (mat->was_assembled) {
265             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
266 #if defined(PETSC_USE_CTABLE)
267             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
268             col = col - 1;
269 #else
270             col = baij->colmap[in[j] / bs] - 1;
271 #endif
272             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
273               PetscCall(MatDisAssemble_MPIBAIJ(mat));
274               col = in[j];
275               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
276               B     = baij->B;
277               b     = (Mat_SeqBAIJ *)(B)->data;
278               bimax = b->imax;
279               bi    = b->i;
280               bilen = b->ilen;
281               bj    = b->j;
282               ba    = b->a;
283             } else {
284               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
285               col += in[j] % bs;
286             }
287           } else col = in[j];
288           if (roworiented) value = v[i * n + j];
289           else value = v[i + j * m];
290           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
291           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
292         }
293       }
294     } else {
295       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
296       if (!baij->donotstash) {
297         mat->assembled = PETSC_FALSE;
298         if (roworiented) {
299           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
300         } else {
301           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
302         }
303       }
304     }
305   }
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
310 {
311   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
312   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
313   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
314   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
315   PetscBool          roworiented = a->roworiented;
316   const PetscScalar *value       = v;
317   MatScalar         *ap, *aa = a->a, *bap;
318 
319   PetscFunctionBegin;
320   rp    = aj + ai[row];
321   ap    = aa + bs2 * ai[row];
322   rmax  = imax[row];
323   nrow  = ailen[row];
324   value = v;
325   low   = 0;
326   high  = nrow;
327   while (high - low > 7) {
328     t = (low + high) / 2;
329     if (rp[t] > col) high = t;
330     else low = t;
331   }
332   for (i = low; i < high; i++) {
333     if (rp[i] > col) break;
334     if (rp[i] == col) {
335       bap = ap + bs2 * i;
336       if (roworiented) {
337         if (is == ADD_VALUES) {
338           for (ii = 0; ii < bs; ii++) {
339             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
340           }
341         } else {
342           for (ii = 0; ii < bs; ii++) {
343             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
344           }
345         }
346       } else {
347         if (is == ADD_VALUES) {
348           for (ii = 0; ii < bs; ii++, value += bs) {
349             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
350             bap += bs;
351           }
352         } else {
353           for (ii = 0; ii < bs; ii++, value += bs) {
354             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
355             bap += bs;
356           }
357         }
358       }
359       goto noinsert2;
360     }
361   }
362   if (nonew == 1) goto noinsert2;
363   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
364   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
365   N = nrow++ - 1;
366   high++;
367   /* shift up all the later entries in this row */
368   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
369   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
370   rp[i] = col;
371   bap   = ap + bs2 * i;
372   if (roworiented) {
373     for (ii = 0; ii < bs; ii++) {
374       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
375     }
376   } else {
377     for (ii = 0; ii < bs; ii++) {
378       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
379     }
380   }
381 noinsert2:;
382   ailen[row] = nrow;
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 /*
387     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
388     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
389 */
390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
391 {
392   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
393   const PetscScalar *value;
394   MatScalar         *barray      = baij->barray;
395   PetscBool          roworiented = baij->roworiented;
396   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
397   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
398   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
399 
400   PetscFunctionBegin;
401   if (!barray) {
402     PetscCall(PetscMalloc1(bs2, &barray));
403     baij->barray = barray;
404   }
405 
406   if (roworiented) stepval = (n - 1) * bs;
407   else stepval = (m - 1) * bs;
408 
409   for (i = 0; i < m; i++) {
410     if (im[i] < 0) continue;
411     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
412     if (im[i] >= rstart && im[i] < rend) {
413       row = im[i] - rstart;
414       for (j = 0; j < n; j++) {
415         /* If NumCol = 1 then a copy is not required */
416         if ((roworiented) && (n == 1)) {
417           barray = (MatScalar *)v + i * bs2;
418         } else if ((!roworiented) && (m == 1)) {
419           barray = (MatScalar *)v + j * bs2;
420         } else { /* Here a copy is required */
421           if (roworiented) {
422             value = v + (i * (stepval + bs) + j) * bs;
423           } else {
424             value = v + (j * (stepval + bs) + i) * bs;
425           }
426           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
427             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
428             barray += bs;
429           }
430           barray -= bs2;
431         }
432 
433         if (in[j] >= cstart && in[j] < cend) {
434           col = in[j] - cstart;
435           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
436         } else if (in[j] < 0) {
437           continue;
438         } else {
439           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
440           if (mat->was_assembled) {
441             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
442 
443 #if defined(PETSC_USE_CTABLE)
444             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
445             col = col < 1 ? -1 : (col - 1) / bs;
446 #else
447             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
448 #endif
449             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
450               PetscCall(MatDisAssemble_MPIBAIJ(mat));
451               col = in[j];
452             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
453           } else col = in[j];
454           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
455         }
456       }
457     } else {
458       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
459       if (!baij->donotstash) {
460         if (roworiented) {
461           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
462         } else {
463           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
464         }
465       }
466     }
467   }
468   PetscFunctionReturn(PETSC_SUCCESS);
469 }
470 
471 #define HASH_KEY             0.6180339887
472 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
473 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
474 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
475 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
476 {
477   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
478   PetscBool    roworiented = baij->roworiented;
479   PetscInt     i, j, row, col;
480   PetscInt     rstart_orig = mat->rmap->rstart;
481   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
482   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
483   PetscReal    tmp;
484   MatScalar  **HD       = baij->hd, value;
485   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
486 
487   PetscFunctionBegin;
488   for (i = 0; i < m; i++) {
489     if (PetscDefined(USE_DEBUG)) {
490       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
491       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
492     }
493     row = im[i];
494     if (row >= rstart_orig && row < rend_orig) {
495       for (j = 0; j < n; j++) {
496         col = in[j];
497         if (roworiented) value = v[i * n + j];
498         else value = v[i + j * m];
499         /* Look up PetscInto the Hash Table */
500         key = (row / bs) * Nbs + (col / bs) + 1;
501         h1  = HASH(size, key, tmp);
502 
503         idx = h1;
504         if (PetscDefined(USE_DEBUG)) {
505           insert_ct++;
506           total_ct++;
507           if (HT[idx] != key) {
508             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
509             if (idx == size) {
510               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
511               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
512             }
513           }
514         } else if (HT[idx] != key) {
515           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
516           if (idx == size) {
517             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
518             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
519           }
520         }
521         /* A HASH table entry is found, so insert the values at the correct address */
522         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
523         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
524       }
525     } else if (!baij->donotstash) {
526       if (roworiented) {
527         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
528       } else {
529         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
530       }
531     }
532   }
533   if (PetscDefined(USE_DEBUG)) {
534     baij->ht_total_ct += total_ct;
535     baij->ht_insert_ct += insert_ct;
536   }
537   PetscFunctionReturn(PETSC_SUCCESS);
538 }
539 
540 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
541 {
542   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
543   PetscBool          roworiented = baij->roworiented;
544   PetscInt           i, j, ii, jj, row, col;
545   PetscInt           rstart = baij->rstartbs;
546   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
547   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
548   PetscReal          tmp;
549   MatScalar        **HD = baij->hd, *baij_a;
550   const PetscScalar *v_t, *value;
551   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
552 
553   PetscFunctionBegin;
554   if (roworiented) stepval = (n - 1) * bs;
555   else stepval = (m - 1) * bs;
556 
557   for (i = 0; i < m; i++) {
558     if (PetscDefined(USE_DEBUG)) {
559       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
560       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
561     }
562     row = im[i];
563     v_t = v + i * nbs2;
564     if (row >= rstart && row < rend) {
565       for (j = 0; j < n; j++) {
566         col = in[j];
567 
568         /* Look up into the Hash Table */
569         key = row * Nbs + col + 1;
570         h1  = HASH(size, key, tmp);
571 
572         idx = h1;
573         if (PetscDefined(USE_DEBUG)) {
574           total_ct++;
575           insert_ct++;
576           if (HT[idx] != key) {
577             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
578             if (idx == size) {
579               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
580               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
581             }
582           }
583         } else if (HT[idx] != key) {
584           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
585           if (idx == size) {
586             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
587             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
588           }
589         }
590         baij_a = HD[idx];
591         if (roworiented) {
592           /*value = v + i*(stepval+bs)*bs + j*bs;*/
593           /* value = v + (i*(stepval+bs)+j)*bs; */
594           value = v_t;
595           v_t += bs;
596           if (addv == ADD_VALUES) {
597             for (ii = 0; ii < bs; ii++, value += stepval) {
598               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
599             }
600           } else {
601             for (ii = 0; ii < bs; ii++, value += stepval) {
602               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
603             }
604           }
605         } else {
606           value = v + j * (stepval + bs) * bs + i * bs;
607           if (addv == ADD_VALUES) {
608             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
609               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
610             }
611           } else {
612             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
613               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
614             }
615           }
616         }
617       }
618     } else {
619       if (!baij->donotstash) {
620         if (roworiented) {
621           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
622         } else {
623           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
624         }
625       }
626     }
627   }
628   if (PetscDefined(USE_DEBUG)) {
629     baij->ht_total_ct += total_ct;
630     baij->ht_insert_ct += insert_ct;
631   }
632   PetscFunctionReturn(PETSC_SUCCESS);
633 }
634 
635 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
636 {
637   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
638   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
639   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
640 
641   PetscFunctionBegin;
642   for (i = 0; i < m; i++) {
643     if (idxm[i] < 0) continue; /* negative row */
644     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
645     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
646       row = idxm[i] - bsrstart;
647       for (j = 0; j < n; j++) {
648         if (idxn[j] < 0) continue; /* negative column */
649         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
650         if (idxn[j] >= bscstart && idxn[j] < bscend) {
651           col = idxn[j] - bscstart;
652           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
653         } else {
654           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
655 #if defined(PETSC_USE_CTABLE)
656           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
657           data--;
658 #else
659           data = baij->colmap[idxn[j] / bs] - 1;
660 #endif
661           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
662           else {
663             col = data + idxn[j] % bs;
664             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
665           }
666         }
667       }
668     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
669   }
670   PetscFunctionReturn(PETSC_SUCCESS);
671 }
672 
673 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
674 {
675   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
676   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
677   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
678   PetscReal    sum = 0.0;
679   MatScalar   *v;
680 
681   PetscFunctionBegin;
682   if (baij->size == 1) {
683     PetscCall(MatNorm(baij->A, type, nrm));
684   } else {
685     if (type == NORM_FROBENIUS) {
686       v  = amat->a;
687       nz = amat->nz * bs2;
688       for (i = 0; i < nz; i++) {
689         sum += PetscRealPart(PetscConj(*v) * (*v));
690         v++;
691       }
692       v  = bmat->a;
693       nz = bmat->nz * bs2;
694       for (i = 0; i < nz; i++) {
695         sum += PetscRealPart(PetscConj(*v) * (*v));
696         v++;
697       }
698       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
699       *nrm = PetscSqrtReal(*nrm);
700     } else if (type == NORM_1) { /* max column sum */
701       PetscReal *tmp, *tmp2;
702       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
703       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
704       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
705       v  = amat->a;
706       jj = amat->j;
707       for (i = 0; i < amat->nz; i++) {
708         for (j = 0; j < bs; j++) {
709           col = bs * (cstart + *jj) + j; /* column index */
710           for (row = 0; row < bs; row++) {
711             tmp[col] += PetscAbsScalar(*v);
712             v++;
713           }
714         }
715         jj++;
716       }
717       v  = bmat->a;
718       jj = bmat->j;
719       for (i = 0; i < bmat->nz; i++) {
720         for (j = 0; j < bs; j++) {
721           col = bs * garray[*jj] + j;
722           for (row = 0; row < bs; row++) {
723             tmp[col] += PetscAbsScalar(*v);
724             v++;
725           }
726         }
727         jj++;
728       }
729       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
730       *nrm = 0.0;
731       for (j = 0; j < mat->cmap->N; j++) {
732         if (tmp2[j] > *nrm) *nrm = tmp2[j];
733       }
734       PetscCall(PetscFree(tmp));
735       PetscCall(PetscFree(tmp2));
736     } else if (type == NORM_INFINITY) { /* max row sum */
737       PetscReal *sums;
738       PetscCall(PetscMalloc1(bs, &sums));
739       sum = 0.0;
740       for (j = 0; j < amat->mbs; j++) {
741         for (row = 0; row < bs; row++) sums[row] = 0.0;
742         v  = amat->a + bs2 * amat->i[j];
743         nz = amat->i[j + 1] - amat->i[j];
744         for (i = 0; i < nz; i++) {
745           for (col = 0; col < bs; col++) {
746             for (row = 0; row < bs; row++) {
747               sums[row] += PetscAbsScalar(*v);
748               v++;
749             }
750           }
751         }
752         v  = bmat->a + bs2 * bmat->i[j];
753         nz = bmat->i[j + 1] - bmat->i[j];
754         for (i = 0; i < nz; i++) {
755           for (col = 0; col < bs; col++) {
756             for (row = 0; row < bs; row++) {
757               sums[row] += PetscAbsScalar(*v);
758               v++;
759             }
760           }
761         }
762         for (row = 0; row < bs; row++) {
763           if (sums[row] > sum) sum = sums[row];
764         }
765       }
766       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
767       PetscCall(PetscFree(sums));
768     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
769   }
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 /*
774   Creates the hash table, and sets the table
775   This table is created only once.
776   If new entried need to be added to the matrix
777   then the hash table has to be destroyed and
778   recreated.
779 */
780 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
781 {
782   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
783   Mat          A = baij->A, B = baij->B;
784   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
785   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
786   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
787   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
788   PetscInt    *HT, key;
789   MatScalar  **HD;
790   PetscReal    tmp;
791 #if defined(PETSC_USE_INFO)
792   PetscInt ct = 0, max = 0;
793 #endif
794 
795   PetscFunctionBegin;
796   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
797 
798   baij->ht_size = (PetscInt)(factor * nz);
799   ht_size       = baij->ht_size;
800 
801   /* Allocate Memory for Hash Table */
802   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
803   HD = baij->hd;
804   HT = baij->ht;
805 
806   /* Loop Over A */
807   for (i = 0; i < a->mbs; i++) {
808     for (j = ai[i]; j < ai[i + 1]; j++) {
809       row = i + rstart;
810       col = aj[j] + cstart;
811 
812       key = row * Nbs + col + 1;
813       h1  = HASH(ht_size, key, tmp);
814       for (k = 0; k < ht_size; k++) {
815         if (!HT[(h1 + k) % ht_size]) {
816           HT[(h1 + k) % ht_size] = key;
817           HD[(h1 + k) % ht_size] = a->a + j * bs2;
818           break;
819 #if defined(PETSC_USE_INFO)
820         } else {
821           ct++;
822 #endif
823         }
824       }
825 #if defined(PETSC_USE_INFO)
826       if (k > max) max = k;
827 #endif
828     }
829   }
830   /* Loop Over B */
831   for (i = 0; i < b->mbs; i++) {
832     for (j = bi[i]; j < bi[i + 1]; j++) {
833       row = i + rstart;
834       col = garray[bj[j]];
835       key = row * Nbs + col + 1;
836       h1  = HASH(ht_size, key, tmp);
837       for (k = 0; k < ht_size; k++) {
838         if (!HT[(h1 + k) % ht_size]) {
839           HT[(h1 + k) % ht_size] = key;
840           HD[(h1 + k) % ht_size] = b->a + j * bs2;
841           break;
842 #if defined(PETSC_USE_INFO)
843         } else {
844           ct++;
845 #endif
846         }
847       }
848 #if defined(PETSC_USE_INFO)
849       if (k > max) max = k;
850 #endif
851     }
852   }
853 
854   /* Print Summary */
855 #if defined(PETSC_USE_INFO)
856   for (i = 0, j = 0; i < ht_size; i++) {
857     if (HT[i]) j++;
858   }
859   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
860 #endif
861   PetscFunctionReturn(PETSC_SUCCESS);
862 }
863 
864 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
865 {
866   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
867   PetscInt     nstash, reallocs;
868 
869   PetscFunctionBegin;
870   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
871 
872   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
873   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
874   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
875   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
876   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
877   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
881 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
882 {
883   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
884   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
885   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
886   PetscInt    *row, *col;
887   PetscBool    r1, r2, r3, other_disassembled;
888   MatScalar   *val;
889   PetscMPIInt  n;
890 
891   PetscFunctionBegin;
892   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
893   if (!baij->donotstash && !mat->nooffprocentries) {
894     while (1) {
895       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
896       if (!flg) break;
897 
898       for (i = 0; i < n;) {
899         /* Now identify the consecutive vals belonging to the same row */
900         for (j = i, rstart = row[j]; j < n; j++) {
901           if (row[j] != rstart) break;
902         }
903         if (j < n) ncols = j - i;
904         else ncols = n - i;
905         /* Now assemble all these values with a single function call */
906         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
907         i = j;
908       }
909     }
910     PetscCall(MatStashScatterEnd_Private(&mat->stash));
911     /* Now process the block-stash. Since the values are stashed column-oriented,
912        set the row-oriented flag to column-oriented, and after MatSetValues()
913        restore the original flags */
914     r1 = baij->roworiented;
915     r2 = a->roworiented;
916     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
917 
918     baij->roworiented                           = PETSC_FALSE;
919     a->roworiented                              = PETSC_FALSE;
920     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
921     while (1) {
922       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
923       if (!flg) break;
924 
925       for (i = 0; i < n;) {
926         /* Now identify the consecutive vals belonging to the same row */
927         for (j = i, rstart = row[j]; j < n; j++) {
928           if (row[j] != rstart) break;
929         }
930         if (j < n) ncols = j - i;
931         else ncols = n - i;
932         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
933         i = j;
934       }
935     }
936     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
937 
938     baij->roworiented                           = r1;
939     a->roworiented                              = r2;
940     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
941   }
942 
943   PetscCall(MatAssemblyBegin(baij->A, mode));
944   PetscCall(MatAssemblyEnd(baij->A, mode));
945 
946   /* determine if any processor has disassembled, if so we must
947      also disassemble ourselves, in order that we may reassemble. */
948   /*
949      if nonzero structure of submatrix B cannot change then we know that
950      no processor disassembled thus we can skip this stuff
951   */
952   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
953     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
954     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
955   }
956 
957   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
958   PetscCall(MatAssemblyBegin(baij->B, mode));
959   PetscCall(MatAssemblyEnd(baij->B, mode));
960 
961 #if defined(PETSC_USE_INFO)
962   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
963     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
964 
965     baij->ht_total_ct  = 0;
966     baij->ht_insert_ct = 0;
967   }
968 #endif
969   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
970     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
971 
972     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
973     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
974   }
975 
976   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
977 
978   baij->rowvalues = NULL;
979 
980   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
981   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
982     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
983     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
984   }
985   PetscFunctionReturn(PETSC_SUCCESS);
986 }
987 
988 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
989 #include <petscdraw.h>
990 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
991 {
992   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
993   PetscMPIInt       rank = baij->rank;
994   PetscInt          bs   = mat->rmap->bs;
995   PetscBool         iascii, isdraw;
996   PetscViewer       sviewer;
997   PetscViewerFormat format;
998 
999   PetscFunctionBegin;
1000   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1001   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1002   if (iascii) {
1003     PetscCall(PetscViewerGetFormat(viewer, &format));
1004     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1005       MatInfo info;
1006       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1007       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1008       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1009       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1010                                                    mat->rmap->bs, (double)info.memory));
1011       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1012       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1013       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1014       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1015       PetscCall(PetscViewerFlush(viewer));
1016       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1017       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1018       PetscCall(VecScatterView(baij->Mvctx, viewer));
1019       PetscFunctionReturn(PETSC_SUCCESS);
1020     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1021       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1022       PetscFunctionReturn(PETSC_SUCCESS);
1023     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1024       PetscFunctionReturn(PETSC_SUCCESS);
1025     }
1026   }
1027 
1028   if (isdraw) {
1029     PetscDraw draw;
1030     PetscBool isnull;
1031     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1032     PetscCall(PetscDrawIsNull(draw, &isnull));
1033     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1034   }
1035 
1036   {
1037     /* assemble the entire matrix onto first processor. */
1038     Mat          A;
1039     Mat_SeqBAIJ *Aloc;
1040     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1041     MatScalar   *a;
1042     const char  *matname;
1043 
1044     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1045     /* Perhaps this should be the type of mat? */
1046     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1047     if (rank == 0) {
1048       PetscCall(MatSetSizes(A, M, N, M, N));
1049     } else {
1050       PetscCall(MatSetSizes(A, 0, 0, M, N));
1051     }
1052     PetscCall(MatSetType(A, MATMPIBAIJ));
1053     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1054     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1055 
1056     /* copy over the A part */
1057     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1058     ai   = Aloc->i;
1059     aj   = Aloc->j;
1060     a    = Aloc->a;
1061     PetscCall(PetscMalloc1(bs, &rvals));
1062 
1063     for (i = 0; i < mbs; i++) {
1064       rvals[0] = bs * (baij->rstartbs + i);
1065       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1066       for (j = ai[i]; j < ai[i + 1]; j++) {
1067         col = (baij->cstartbs + aj[j]) * bs;
1068         for (k = 0; k < bs; k++) {
1069           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1070           col++;
1071           a += bs;
1072         }
1073       }
1074     }
1075     /* copy over the B part */
1076     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1077     ai   = Aloc->i;
1078     aj   = Aloc->j;
1079     a    = Aloc->a;
1080     for (i = 0; i < mbs; i++) {
1081       rvals[0] = bs * (baij->rstartbs + i);
1082       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1083       for (j = ai[i]; j < ai[i + 1]; j++) {
1084         col = baij->garray[aj[j]] * bs;
1085         for (k = 0; k < bs; k++) {
1086           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1087           col++;
1088           a += bs;
1089         }
1090       }
1091     }
1092     PetscCall(PetscFree(rvals));
1093     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1094     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1095     /*
1096        Everyone has to call to draw the matrix since the graphics waits are
1097        synchronized across all processors that share the PetscDraw object
1098     */
1099     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1100     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1101     if (rank == 0) {
1102       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1103       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1104     }
1105     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1106     PetscCall(MatDestroy(&A));
1107   }
1108   PetscFunctionReturn(PETSC_SUCCESS);
1109 }
1110 
1111 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1112 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1113 {
1114   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1115   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1116   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1117   const PetscInt *garray = aij->garray;
1118   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1119   PetscInt64      nz, hnz;
1120   PetscInt       *rowlens, *colidxs;
1121   PetscScalar    *matvals;
1122   PetscMPIInt     rank;
1123 
1124   PetscFunctionBegin;
1125   PetscCall(PetscViewerSetUp(viewer));
1126 
1127   M  = mat->rmap->N;
1128   N  = mat->cmap->N;
1129   m  = mat->rmap->n;
1130   rs = mat->rmap->rstart;
1131   cs = mat->cmap->rstart;
1132   bs = mat->rmap->bs;
1133   nz = bs * bs * (A->nz + B->nz);
1134 
1135   /* write matrix header */
1136   header[0] = MAT_FILE_CLASSID;
1137   header[1] = M;
1138   header[2] = N;
1139   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1140   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1141   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1142   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1143 
1144   /* fill in and store row lengths */
1145   PetscCall(PetscMalloc1(m, &rowlens));
1146   for (cnt = 0, i = 0; i < A->mbs; i++)
1147     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1148   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1149   PetscCall(PetscFree(rowlens));
1150 
1151   /* fill in and store column indices */
1152   PetscCall(PetscMalloc1(nz, &colidxs));
1153   for (cnt = 0, i = 0; i < A->mbs; i++) {
1154     for (k = 0; k < bs; k++) {
1155       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1156         if (garray[B->j[jb]] > cs / bs) break;
1157         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1158       }
1159       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1160         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1161       for (; jb < B->i[i + 1]; jb++)
1162         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1163     }
1164   }
1165   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1166   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1167   PetscCall(PetscFree(colidxs));
1168 
1169   /* fill in and store nonzero values */
1170   PetscCall(PetscMalloc1(nz, &matvals));
1171   for (cnt = 0, i = 0; i < A->mbs; i++) {
1172     for (k = 0; k < bs; k++) {
1173       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1174         if (garray[B->j[jb]] > cs / bs) break;
1175         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1176       }
1177       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1178         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1179       for (; jb < B->i[i + 1]; jb++)
1180         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1181     }
1182   }
1183   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1184   PetscCall(PetscFree(matvals));
1185 
1186   /* write block size option to the viewer's .info file */
1187   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1188   PetscFunctionReturn(PETSC_SUCCESS);
1189 }
1190 
1191 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1192 {
1193   PetscBool iascii, isdraw, issocket, isbinary;
1194 
1195   PetscFunctionBegin;
1196   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1197   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1198   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1199   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1200   if (iascii || isdraw || issocket) {
1201     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1202   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1203   PetscFunctionReturn(PETSC_SUCCESS);
1204 }
1205 
1206 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1207 {
1208   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1209   PetscInt     nt;
1210 
1211   PetscFunctionBegin;
1212   PetscCall(VecGetLocalSize(xx, &nt));
1213   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1214   PetscCall(VecGetLocalSize(yy, &nt));
1215   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1216   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1217   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1218   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1219   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1220   PetscFunctionReturn(PETSC_SUCCESS);
1221 }
1222 
1223 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1224 {
1225   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1226 
1227   PetscFunctionBegin;
1228   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1229   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1230   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1231   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1232   PetscFunctionReturn(PETSC_SUCCESS);
1233 }
1234 
1235 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1236 {
1237   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1238 
1239   PetscFunctionBegin;
1240   /* do nondiagonal part */
1241   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1242   /* do local part */
1243   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1244   /* add partial results together */
1245   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1246   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1247   PetscFunctionReturn(PETSC_SUCCESS);
1248 }
1249 
1250 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1251 {
1252   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1253 
1254   PetscFunctionBegin;
1255   /* do nondiagonal part */
1256   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1257   /* do local part */
1258   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1259   /* add partial results together */
1260   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1261   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1262   PetscFunctionReturn(PETSC_SUCCESS);
1263 }
1264 
1265 /*
1266   This only works correctly for square matrices where the subblock A->A is the
1267    diagonal block
1268 */
1269 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1270 {
1271   PetscFunctionBegin;
1272   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1273   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1278 {
1279   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1280 
1281   PetscFunctionBegin;
1282   PetscCall(MatScale(a->A, aa));
1283   PetscCall(MatScale(a->B, aa));
1284   PetscFunctionReturn(PETSC_SUCCESS);
1285 }
1286 
1287 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1288 {
1289   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1290   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1291   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1292   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1293   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1294 
1295   PetscFunctionBegin;
1296   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1297   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1298   mat->getrowactive = PETSC_TRUE;
1299 
1300   if (!mat->rowvalues && (idx || v)) {
1301     /*
1302         allocate enough space to hold information from the longest row.
1303     */
1304     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1305     PetscInt     max = 1, mbs = mat->mbs, tmp;
1306     for (i = 0; i < mbs; i++) {
1307       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1308       if (max < tmp) max = tmp;
1309     }
1310     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1311   }
1312   lrow = row - brstart;
1313 
1314   pvA = &vworkA;
1315   pcA = &cworkA;
1316   pvB = &vworkB;
1317   pcB = &cworkB;
1318   if (!v) {
1319     pvA = NULL;
1320     pvB = NULL;
1321   }
1322   if (!idx) {
1323     pcA = NULL;
1324     if (!v) pcB = NULL;
1325   }
1326   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1327   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1328   nztot = nzA + nzB;
1329 
1330   cmap = mat->garray;
1331   if (v || idx) {
1332     if (nztot) {
1333       /* Sort by increasing column numbers, assuming A and B already sorted */
1334       PetscInt imark = -1;
1335       if (v) {
1336         *v = v_p = mat->rowvalues;
1337         for (i = 0; i < nzB; i++) {
1338           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1339           else break;
1340         }
1341         imark = i;
1342         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1343         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1344       }
1345       if (idx) {
1346         *idx = idx_p = mat->rowindices;
1347         if (imark > -1) {
1348           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1349         } else {
1350           for (i = 0; i < nzB; i++) {
1351             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1352             else break;
1353           }
1354           imark = i;
1355         }
1356         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1357         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1358       }
1359     } else {
1360       if (idx) *idx = NULL;
1361       if (v) *v = NULL;
1362     }
1363   }
1364   *nz = nztot;
1365   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1366   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1371 {
1372   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1373 
1374   PetscFunctionBegin;
1375   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1376   baij->getrowactive = PETSC_FALSE;
1377   PetscFunctionReturn(PETSC_SUCCESS);
1378 }
1379 
1380 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1381 {
1382   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1383 
1384   PetscFunctionBegin;
1385   PetscCall(MatZeroEntries(l->A));
1386   PetscCall(MatZeroEntries(l->B));
1387   PetscFunctionReturn(PETSC_SUCCESS);
1388 }
1389 
1390 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1391 {
1392   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1393   Mat            A = a->A, B = a->B;
1394   PetscLogDouble isend[5], irecv[5];
1395 
1396   PetscFunctionBegin;
1397   info->block_size = (PetscReal)matin->rmap->bs;
1398 
1399   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1400 
1401   isend[0] = info->nz_used;
1402   isend[1] = info->nz_allocated;
1403   isend[2] = info->nz_unneeded;
1404   isend[3] = info->memory;
1405   isend[4] = info->mallocs;
1406 
1407   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1408 
1409   isend[0] += info->nz_used;
1410   isend[1] += info->nz_allocated;
1411   isend[2] += info->nz_unneeded;
1412   isend[3] += info->memory;
1413   isend[4] += info->mallocs;
1414 
1415   if (flag == MAT_LOCAL) {
1416     info->nz_used      = isend[0];
1417     info->nz_allocated = isend[1];
1418     info->nz_unneeded  = isend[2];
1419     info->memory       = isend[3];
1420     info->mallocs      = isend[4];
1421   } else if (flag == MAT_GLOBAL_MAX) {
1422     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1423 
1424     info->nz_used      = irecv[0];
1425     info->nz_allocated = irecv[1];
1426     info->nz_unneeded  = irecv[2];
1427     info->memory       = irecv[3];
1428     info->mallocs      = irecv[4];
1429   } else if (flag == MAT_GLOBAL_SUM) {
1430     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1431 
1432     info->nz_used      = irecv[0];
1433     info->nz_allocated = irecv[1];
1434     info->nz_unneeded  = irecv[2];
1435     info->memory       = irecv[3];
1436     info->mallocs      = irecv[4];
1437   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1438   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1439   info->fill_ratio_needed = 0;
1440   info->factor_mallocs    = 0;
1441   PetscFunctionReturn(PETSC_SUCCESS);
1442 }
1443 
1444 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1445 {
1446   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1447 
1448   PetscFunctionBegin;
1449   switch (op) {
1450   case MAT_NEW_NONZERO_LOCATIONS:
1451   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1452   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1453   case MAT_KEEP_NONZERO_PATTERN:
1454   case MAT_NEW_NONZERO_LOCATION_ERR:
1455     MatCheckPreallocated(A, 1);
1456     PetscCall(MatSetOption(a->A, op, flg));
1457     PetscCall(MatSetOption(a->B, op, flg));
1458     break;
1459   case MAT_ROW_ORIENTED:
1460     MatCheckPreallocated(A, 1);
1461     a->roworiented = flg;
1462 
1463     PetscCall(MatSetOption(a->A, op, flg));
1464     PetscCall(MatSetOption(a->B, op, flg));
1465     break;
1466   case MAT_FORCE_DIAGONAL_ENTRIES:
1467   case MAT_SORTED_FULL:
1468     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1469     break;
1470   case MAT_IGNORE_OFF_PROC_ENTRIES:
1471     a->donotstash = flg;
1472     break;
1473   case MAT_USE_HASH_TABLE:
1474     a->ht_flag = flg;
1475     a->ht_fact = 1.39;
1476     break;
1477   case MAT_SYMMETRIC:
1478   case MAT_STRUCTURALLY_SYMMETRIC:
1479   case MAT_HERMITIAN:
1480   case MAT_SUBMAT_SINGLEIS:
1481   case MAT_SYMMETRY_ETERNAL:
1482   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1483   case MAT_SPD_ETERNAL:
1484     /* if the diagonal matrix is square it inherits some of the properties above */
1485     break;
1486   default:
1487     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1488   }
1489   PetscFunctionReturn(PETSC_SUCCESS);
1490 }
1491 
1492 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1493 {
1494   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1495   Mat_SeqBAIJ *Aloc;
1496   Mat          B;
1497   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1498   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1499   MatScalar   *a;
1500 
1501   PetscFunctionBegin;
1502   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1503   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1504     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1505     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1506     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1507     /* Do not know preallocation information, but must set block size */
1508     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1509   } else {
1510     B = *matout;
1511   }
1512 
1513   /* copy over the A part */
1514   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1515   ai   = Aloc->i;
1516   aj   = Aloc->j;
1517   a    = Aloc->a;
1518   PetscCall(PetscMalloc1(bs, &rvals));
1519 
1520   for (i = 0; i < mbs; i++) {
1521     rvals[0] = bs * (baij->rstartbs + i);
1522     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1523     for (j = ai[i]; j < ai[i + 1]; j++) {
1524       col = (baij->cstartbs + aj[j]) * bs;
1525       for (k = 0; k < bs; k++) {
1526         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1527 
1528         col++;
1529         a += bs;
1530       }
1531     }
1532   }
1533   /* copy over the B part */
1534   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1535   ai   = Aloc->i;
1536   aj   = Aloc->j;
1537   a    = Aloc->a;
1538   for (i = 0; i < mbs; i++) {
1539     rvals[0] = bs * (baij->rstartbs + i);
1540     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1541     for (j = ai[i]; j < ai[i + 1]; j++) {
1542       col = baij->garray[aj[j]] * bs;
1543       for (k = 0; k < bs; k++) {
1544         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1545         col++;
1546         a += bs;
1547       }
1548     }
1549   }
1550   PetscCall(PetscFree(rvals));
1551   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1552   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1553 
1554   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1555   else PetscCall(MatHeaderMerge(A, &B));
1556   PetscFunctionReturn(PETSC_SUCCESS);
1557 }
1558 
1559 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1560 {
1561   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1562   Mat          a = baij->A, b = baij->B;
1563   PetscInt     s1, s2, s3;
1564 
1565   PetscFunctionBegin;
1566   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1567   if (rr) {
1568     PetscCall(VecGetLocalSize(rr, &s1));
1569     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1570     /* Overlap communication with computation. */
1571     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1572   }
1573   if (ll) {
1574     PetscCall(VecGetLocalSize(ll, &s1));
1575     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1576     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1577   }
1578   /* scale  the diagonal block */
1579   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1580 
1581   if (rr) {
1582     /* Do a scatter end and then right scale the off-diagonal block */
1583     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1584     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1585   }
1586   PetscFunctionReturn(PETSC_SUCCESS);
1587 }
1588 
1589 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1590 {
1591   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1592   PetscInt    *lrows;
1593   PetscInt     r, len;
1594   PetscBool    cong;
1595 
1596   PetscFunctionBegin;
1597   /* get locally owned rows */
1598   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1599   /* fix right-hand side if needed */
1600   if (x && b) {
1601     const PetscScalar *xx;
1602     PetscScalar       *bb;
1603 
1604     PetscCall(VecGetArrayRead(x, &xx));
1605     PetscCall(VecGetArray(b, &bb));
1606     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1607     PetscCall(VecRestoreArrayRead(x, &xx));
1608     PetscCall(VecRestoreArray(b, &bb));
1609   }
1610 
1611   /* actually zap the local rows */
1612   /*
1613         Zero the required rows. If the "diagonal block" of the matrix
1614      is square and the user wishes to set the diagonal we use separate
1615      code so that MatSetValues() is not called for each diagonal allocating
1616      new memory, thus calling lots of mallocs and slowing things down.
1617 
1618   */
1619   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1620   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1621   PetscCall(MatHasCongruentLayouts(A, &cong));
1622   if ((diag != 0.0) && cong) {
1623     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1624   } else if (diag != 0.0) {
1625     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1626     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1627     for (r = 0; r < len; ++r) {
1628       const PetscInt row = lrows[r] + A->rmap->rstart;
1629       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1630     }
1631     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1632     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1633   } else {
1634     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1635   }
1636   PetscCall(PetscFree(lrows));
1637 
1638   /* only change matrix nonzero state if pattern was allowed to be changed */
1639   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1640     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1641     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1642   }
1643   PetscFunctionReturn(PETSC_SUCCESS);
1644 }
1645 
1646 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1647 {
1648   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1649   PetscMPIInt        n = A->rmap->n, p = 0;
1650   PetscInt           i, j, k, r, len = 0, row, col, count;
1651   PetscInt          *lrows, *owners = A->rmap->range;
1652   PetscSFNode       *rrows;
1653   PetscSF            sf;
1654   const PetscScalar *xx;
1655   PetscScalar       *bb, *mask;
1656   Vec                xmask, lmask;
1657   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1658   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1659   PetscScalar       *aa;
1660 
1661   PetscFunctionBegin;
1662   /* Create SF where leaves are input rows and roots are owned rows */
1663   PetscCall(PetscMalloc1(n, &lrows));
1664   for (r = 0; r < n; ++r) lrows[r] = -1;
1665   PetscCall(PetscMalloc1(N, &rrows));
1666   for (r = 0; r < N; ++r) {
1667     const PetscInt idx = rows[r];
1668     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1669     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1670       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1671     }
1672     rrows[r].rank  = p;
1673     rrows[r].index = rows[r] - owners[p];
1674   }
1675   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1676   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1677   /* Collect flags for rows to be zeroed */
1678   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1679   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1680   PetscCall(PetscSFDestroy(&sf));
1681   /* Compress and put in row numbers */
1682   for (r = 0; r < n; ++r)
1683     if (lrows[r] >= 0) lrows[len++] = r;
1684   /* zero diagonal part of matrix */
1685   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1686   /* handle off-diagonal part of matrix */
1687   PetscCall(MatCreateVecs(A, &xmask, NULL));
1688   PetscCall(VecDuplicate(l->lvec, &lmask));
1689   PetscCall(VecGetArray(xmask, &bb));
1690   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1691   PetscCall(VecRestoreArray(xmask, &bb));
1692   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1693   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1694   PetscCall(VecDestroy(&xmask));
1695   if (x) {
1696     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1697     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1698     PetscCall(VecGetArrayRead(l->lvec, &xx));
1699     PetscCall(VecGetArray(b, &bb));
1700   }
1701   PetscCall(VecGetArray(lmask, &mask));
1702   /* remove zeroed rows of off-diagonal matrix */
1703   for (i = 0; i < len; ++i) {
1704     row   = lrows[i];
1705     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1706     aa    = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs);
1707     for (k = 0; k < count; ++k) {
1708       aa[0] = 0.0;
1709       aa += bs;
1710     }
1711   }
1712   /* loop over all elements of off process part of matrix zeroing removed columns*/
1713   for (i = 0; i < l->B->rmap->N; ++i) {
1714     row = i / bs;
1715     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1716       for (k = 0; k < bs; ++k) {
1717         col = bs * baij->j[j] + k;
1718         if (PetscAbsScalar(mask[col])) {
1719           aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k;
1720           if (x) bb[i] -= aa[0] * xx[col];
1721           aa[0] = 0.0;
1722         }
1723       }
1724     }
1725   }
1726   if (x) {
1727     PetscCall(VecRestoreArray(b, &bb));
1728     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1729   }
1730   PetscCall(VecRestoreArray(lmask, &mask));
1731   PetscCall(VecDestroy(&lmask));
1732   PetscCall(PetscFree(lrows));
1733 
1734   /* only change matrix nonzero state if pattern was allowed to be changed */
1735   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1736     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1737     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1738   }
1739   PetscFunctionReturn(PETSC_SUCCESS);
1740 }
1741 
1742 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1743 {
1744   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1745 
1746   PetscFunctionBegin;
1747   PetscCall(MatSetUnfactored(a->A));
1748   PetscFunctionReturn(PETSC_SUCCESS);
1749 }
1750 
1751 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1752 
1753 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1754 {
1755   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1756   Mat          a, b, c, d;
1757   PetscBool    flg;
1758 
1759   PetscFunctionBegin;
1760   a = matA->A;
1761   b = matA->B;
1762   c = matB->A;
1763   d = matB->B;
1764 
1765   PetscCall(MatEqual(a, c, &flg));
1766   if (flg) PetscCall(MatEqual(b, d, &flg));
1767   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1768   PetscFunctionReturn(PETSC_SUCCESS);
1769 }
1770 
1771 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1772 {
1773   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1774   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1775 
1776   PetscFunctionBegin;
1777   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1778   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1779     PetscCall(MatCopy_Basic(A, B, str));
1780   } else {
1781     PetscCall(MatCopy(a->A, b->A, str));
1782     PetscCall(MatCopy(a->B, b->B, str));
1783   }
1784   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1785   PetscFunctionReturn(PETSC_SUCCESS);
1786 }
1787 
1788 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1789 {
1790   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1791   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1792   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1793 
1794   PetscFunctionBegin;
1795   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1796   PetscFunctionReturn(PETSC_SUCCESS);
1797 }
1798 
1799 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1800 {
1801   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1802   PetscBLASInt bnz, one                         = 1;
1803   Mat_SeqBAIJ *x, *y;
1804   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1805 
1806   PetscFunctionBegin;
1807   if (str == SAME_NONZERO_PATTERN) {
1808     PetscScalar alpha = a;
1809     x                 = (Mat_SeqBAIJ *)xx->A->data;
1810     y                 = (Mat_SeqBAIJ *)yy->A->data;
1811     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1812     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1813     x = (Mat_SeqBAIJ *)xx->B->data;
1814     y = (Mat_SeqBAIJ *)yy->B->data;
1815     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1816     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1817     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1818   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1819     PetscCall(MatAXPY_Basic(Y, a, X, str));
1820   } else {
1821     Mat       B;
1822     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1823     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1824     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1825     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1826     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1827     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1828     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1829     PetscCall(MatSetType(B, MATMPIBAIJ));
1830     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1831     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1832     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1833     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1834     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1835     PetscCall(MatHeaderMerge(Y, &B));
1836     PetscCall(PetscFree(nnz_d));
1837     PetscCall(PetscFree(nnz_o));
1838   }
1839   PetscFunctionReturn(PETSC_SUCCESS);
1840 }
1841 
1842 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1843 {
1844   PetscFunctionBegin;
1845   if (PetscDefined(USE_COMPLEX)) {
1846     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1847 
1848     PetscCall(MatConjugate_SeqBAIJ(a->A));
1849     PetscCall(MatConjugate_SeqBAIJ(a->B));
1850   }
1851   PetscFunctionReturn(PETSC_SUCCESS);
1852 }
1853 
1854 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1855 {
1856   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1857 
1858   PetscFunctionBegin;
1859   PetscCall(MatRealPart(a->A));
1860   PetscCall(MatRealPart(a->B));
1861   PetscFunctionReturn(PETSC_SUCCESS);
1862 }
1863 
1864 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1865 {
1866   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1867 
1868   PetscFunctionBegin;
1869   PetscCall(MatImaginaryPart(a->A));
1870   PetscCall(MatImaginaryPart(a->B));
1871   PetscFunctionReturn(PETSC_SUCCESS);
1872 }
1873 
1874 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1875 {
1876   IS       iscol_local;
1877   PetscInt csize;
1878 
1879   PetscFunctionBegin;
1880   PetscCall(ISGetLocalSize(iscol, &csize));
1881   if (call == MAT_REUSE_MATRIX) {
1882     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1883     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1884   } else {
1885     PetscCall(ISAllGather(iscol, &iscol_local));
1886   }
1887   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1888   if (call == MAT_INITIAL_MATRIX) {
1889     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1890     PetscCall(ISDestroy(&iscol_local));
1891   }
1892   PetscFunctionReturn(PETSC_SUCCESS);
1893 }
1894 
1895 /*
1896   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1897   in local and then by concatenating the local matrices the end result.
1898   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1899   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1900 */
1901 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1902 {
1903   PetscMPIInt  rank, size;
1904   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1905   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1906   Mat          M, Mreuse;
1907   MatScalar   *vwork, *aa;
1908   MPI_Comm     comm;
1909   IS           isrow_new, iscol_new;
1910   Mat_SeqBAIJ *aij;
1911 
1912   PetscFunctionBegin;
1913   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1914   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1915   PetscCallMPI(MPI_Comm_size(comm, &size));
1916   /* The compression and expansion should be avoided. Doesn't point
1917      out errors, might change the indices, hence buggey */
1918   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1919   if (isrow == iscol) {
1920     iscol_new = isrow_new;
1921     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1922   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1923 
1924   if (call == MAT_REUSE_MATRIX) {
1925     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1926     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1927     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1928   } else {
1929     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1930   }
1931   PetscCall(ISDestroy(&isrow_new));
1932   PetscCall(ISDestroy(&iscol_new));
1933   /*
1934       m - number of local rows
1935       n - number of columns (same on all processors)
1936       rstart - first row in new global matrix generated
1937   */
1938   PetscCall(MatGetBlockSize(mat, &bs));
1939   PetscCall(MatGetSize(Mreuse, &m, &n));
1940   m = m / bs;
1941   n = n / bs;
1942 
1943   if (call == MAT_INITIAL_MATRIX) {
1944     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1945     ii  = aij->i;
1946     jj  = aij->j;
1947 
1948     /*
1949         Determine the number of non-zeros in the diagonal and off-diagonal
1950         portions of the matrix in order to do correct preallocation
1951     */
1952 
1953     /* first get start and end of "diagonal" columns */
1954     if (csize == PETSC_DECIDE) {
1955       PetscCall(ISGetSize(isrow, &mglobal));
1956       if (mglobal == n * bs) { /* square matrix */
1957         nlocal = m;
1958       } else {
1959         nlocal = n / size + ((n % size) > rank);
1960       }
1961     } else {
1962       nlocal = csize / bs;
1963     }
1964     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1965     rstart = rend - nlocal;
1966     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1967 
1968     /* next, compute all the lengths */
1969     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1970     for (i = 0; i < m; i++) {
1971       jend = ii[i + 1] - ii[i];
1972       olen = 0;
1973       dlen = 0;
1974       for (j = 0; j < jend; j++) {
1975         if (*jj < rstart || *jj >= rend) olen++;
1976         else dlen++;
1977         jj++;
1978       }
1979       olens[i] = olen;
1980       dlens[i] = dlen;
1981     }
1982     PetscCall(MatCreate(comm, &M));
1983     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1984     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
1985     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1986     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1987     PetscCall(PetscFree2(dlens, olens));
1988   } else {
1989     PetscInt ml, nl;
1990 
1991     M = *newmat;
1992     PetscCall(MatGetLocalSize(M, &ml, &nl));
1993     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
1994     PetscCall(MatZeroEntries(M));
1995     /*
1996          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1997        rather than the slower MatSetValues().
1998     */
1999     M->was_assembled = PETSC_TRUE;
2000     M->assembled     = PETSC_FALSE;
2001   }
2002   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2003   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2004   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2005   ii  = aij->i;
2006   jj  = aij->j;
2007   aa  = aij->a;
2008   for (i = 0; i < m; i++) {
2009     row   = rstart / bs + i;
2010     nz    = ii[i + 1] - ii[i];
2011     cwork = jj;
2012     jj    = PetscSafePointerPlusOffset(jj, nz);
2013     vwork = aa;
2014     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2015     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2016   }
2017 
2018   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2019   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2020   *newmat = M;
2021 
2022   /* save submatrix used in processor for next request */
2023   if (call == MAT_INITIAL_MATRIX) {
2024     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2025     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2026   }
2027   PetscFunctionReturn(PETSC_SUCCESS);
2028 }
2029 
2030 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2031 {
2032   MPI_Comm        comm, pcomm;
2033   PetscInt        clocal_size, nrows;
2034   const PetscInt *rows;
2035   PetscMPIInt     size;
2036   IS              crowp, lcolp;
2037 
2038   PetscFunctionBegin;
2039   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2040   /* make a collective version of 'rowp' */
2041   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2042   if (pcomm == comm) {
2043     crowp = rowp;
2044   } else {
2045     PetscCall(ISGetSize(rowp, &nrows));
2046     PetscCall(ISGetIndices(rowp, &rows));
2047     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2048     PetscCall(ISRestoreIndices(rowp, &rows));
2049   }
2050   PetscCall(ISSetPermutation(crowp));
2051   /* make a local version of 'colp' */
2052   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2053   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2054   if (size == 1) {
2055     lcolp = colp;
2056   } else {
2057     PetscCall(ISAllGather(colp, &lcolp));
2058   }
2059   PetscCall(ISSetPermutation(lcolp));
2060   /* now we just get the submatrix */
2061   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2062   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2063   /* clean up */
2064   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2065   if (size > 1) PetscCall(ISDestroy(&lcolp));
2066   PetscFunctionReturn(PETSC_SUCCESS);
2067 }
2068 
2069 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2070 {
2071   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2072   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2073 
2074   PetscFunctionBegin;
2075   if (nghosts) *nghosts = B->nbs;
2076   if (ghosts) *ghosts = baij->garray;
2077   PetscFunctionReturn(PETSC_SUCCESS);
2078 }
2079 
2080 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2081 {
2082   Mat          B;
2083   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2084   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2085   Mat_SeqAIJ  *b;
2086   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2087   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2088   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2089 
2090   PetscFunctionBegin;
2091   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2092   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2093 
2094   /*   Tell every processor the number of nonzeros per row  */
2095   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2096   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2097   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2098   displs = recvcounts + size;
2099   for (i = 0; i < size; i++) {
2100     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2101     displs[i]     = A->rmap->range[i] / bs;
2102   }
2103   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2104   /* Create the sequential matrix of the same type as the local block diagonal  */
2105   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2106   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2107   PetscCall(MatSetType(B, MATSEQAIJ));
2108   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2109   b = (Mat_SeqAIJ *)B->data;
2110 
2111   /*     Copy my part of matrix column indices over  */
2112   sendcount  = ad->nz + bd->nz;
2113   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2114   a_jsendbuf = ad->j;
2115   b_jsendbuf = bd->j;
2116   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2117   cnt        = 0;
2118   for (i = 0; i < n; i++) {
2119     /* put in lower diagonal portion */
2120     m = bd->i[i + 1] - bd->i[i];
2121     while (m > 0) {
2122       /* is it above diagonal (in bd (compressed) numbering) */
2123       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2124       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2125       m--;
2126     }
2127 
2128     /* put in diagonal portion */
2129     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2130 
2131     /* put in upper diagonal portion */
2132     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2133   }
2134   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2135 
2136   /*  Gather all column indices to all processors  */
2137   for (i = 0; i < size; i++) {
2138     recvcounts[i] = 0;
2139     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2140   }
2141   displs[0] = 0;
2142   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2143   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2144   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2145   /* set the b->ilen (length of each row) values */
2146   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2147   /* set the b->i indices */
2148   b->i[0] = 0;
2149   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2150   PetscCall(PetscFree(lens));
2151   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2152   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2153   PetscCall(PetscFree(recvcounts));
2154 
2155   PetscCall(MatPropagateSymmetryOptions(A, B));
2156   *newmat = B;
2157   PetscFunctionReturn(PETSC_SUCCESS);
2158 }
2159 
2160 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2161 {
2162   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2163   Vec          bb1 = NULL;
2164 
2165   PetscFunctionBegin;
2166   if (flag == SOR_APPLY_UPPER) {
2167     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2168     PetscFunctionReturn(PETSC_SUCCESS);
2169   }
2170 
2171   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2172 
2173   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2174     if (flag & SOR_ZERO_INITIAL_GUESS) {
2175       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2176       its--;
2177     }
2178 
2179     while (its--) {
2180       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2181       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2182 
2183       /* update rhs: bb1 = bb - B*x */
2184       PetscCall(VecScale(mat->lvec, -1.0));
2185       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2186 
2187       /* local sweep */
2188       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2189     }
2190   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2191     if (flag & SOR_ZERO_INITIAL_GUESS) {
2192       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2193       its--;
2194     }
2195     while (its--) {
2196       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2197       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2198 
2199       /* update rhs: bb1 = bb - B*x */
2200       PetscCall(VecScale(mat->lvec, -1.0));
2201       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2202 
2203       /* local sweep */
2204       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2205     }
2206   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2207     if (flag & SOR_ZERO_INITIAL_GUESS) {
2208       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2209       its--;
2210     }
2211     while (its--) {
2212       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2213       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2214 
2215       /* update rhs: bb1 = bb - B*x */
2216       PetscCall(VecScale(mat->lvec, -1.0));
2217       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2218 
2219       /* local sweep */
2220       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2221     }
2222   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2223 
2224   PetscCall(VecDestroy(&bb1));
2225   PetscFunctionReturn(PETSC_SUCCESS);
2226 }
2227 
2228 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2229 {
2230   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2231   PetscInt     m, N, i, *garray = aij->garray;
2232   PetscInt     ib, jb, bs = A->rmap->bs;
2233   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2234   MatScalar   *a_val = a_aij->a;
2235   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2236   MatScalar   *b_val = b_aij->a;
2237   PetscReal   *work;
2238 
2239   PetscFunctionBegin;
2240   PetscCall(MatGetSize(A, &m, &N));
2241   PetscCall(PetscCalloc1(N, &work));
2242   if (type == NORM_2) {
2243     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2244       for (jb = 0; jb < bs; jb++) {
2245         for (ib = 0; ib < bs; ib++) {
2246           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2247           a_val++;
2248         }
2249       }
2250     }
2251     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2252       for (jb = 0; jb < bs; jb++) {
2253         for (ib = 0; ib < bs; ib++) {
2254           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2255           b_val++;
2256         }
2257       }
2258     }
2259   } else if (type == NORM_1) {
2260     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2261       for (jb = 0; jb < bs; jb++) {
2262         for (ib = 0; ib < bs; ib++) {
2263           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2264           a_val++;
2265         }
2266       }
2267     }
2268     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2269       for (jb = 0; jb < bs; jb++) {
2270         for (ib = 0; ib < bs; ib++) {
2271           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2272           b_val++;
2273         }
2274       }
2275     }
2276   } else if (type == NORM_INFINITY) {
2277     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2278       for (jb = 0; jb < bs; jb++) {
2279         for (ib = 0; ib < bs; ib++) {
2280           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2281           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2282           a_val++;
2283         }
2284       }
2285     }
2286     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2287       for (jb = 0; jb < bs; jb++) {
2288         for (ib = 0; ib < bs; ib++) {
2289           int col   = garray[b_aij->j[i]] * bs + jb;
2290           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2291           b_val++;
2292         }
2293       }
2294     }
2295   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2296     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2297       for (jb = 0; jb < bs; jb++) {
2298         for (ib = 0; ib < bs; ib++) {
2299           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2300           a_val++;
2301         }
2302       }
2303     }
2304     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2305       for (jb = 0; jb < bs; jb++) {
2306         for (ib = 0; ib < bs; ib++) {
2307           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2308           b_val++;
2309         }
2310       }
2311     }
2312   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2313     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2314       for (jb = 0; jb < bs; jb++) {
2315         for (ib = 0; ib < bs; ib++) {
2316           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2317           a_val++;
2318         }
2319       }
2320     }
2321     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2322       for (jb = 0; jb < bs; jb++) {
2323         for (ib = 0; ib < bs; ib++) {
2324           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2325           b_val++;
2326         }
2327       }
2328     }
2329   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2330   if (type == NORM_INFINITY) {
2331     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2332   } else {
2333     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2334   }
2335   PetscCall(PetscFree(work));
2336   if (type == NORM_2) {
2337     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2338   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2339     for (i = 0; i < N; i++) reductions[i] /= m;
2340   }
2341   PetscFunctionReturn(PETSC_SUCCESS);
2342 }
2343 
2344 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2345 {
2346   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2347 
2348   PetscFunctionBegin;
2349   PetscCall(MatInvertBlockDiagonal(a->A, values));
2350   A->factorerrortype             = a->A->factorerrortype;
2351   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2352   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2353   PetscFunctionReturn(PETSC_SUCCESS);
2354 }
2355 
2356 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2357 {
2358   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2359   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2360 
2361   PetscFunctionBegin;
2362   if (!Y->preallocated) {
2363     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2364   } else if (!aij->nz) {
2365     PetscInt nonew = aij->nonew;
2366     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2367     aij->nonew = nonew;
2368   }
2369   PetscCall(MatShift_Basic(Y, a));
2370   PetscFunctionReturn(PETSC_SUCCESS);
2371 }
2372 
2373 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2374 {
2375   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2376 
2377   PetscFunctionBegin;
2378   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2379   PetscCall(MatMissingDiagonal(a->A, missing, d));
2380   if (d) {
2381     PetscInt rstart;
2382     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2383     *d += rstart / A->rmap->bs;
2384   }
2385   PetscFunctionReturn(PETSC_SUCCESS);
2386 }
2387 
2388 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2389 {
2390   PetscFunctionBegin;
2391   *a = ((Mat_MPIBAIJ *)A->data)->A;
2392   PetscFunctionReturn(PETSC_SUCCESS);
2393 }
2394 
2395 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2396 {
2397   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2398 
2399   PetscFunctionBegin;
2400   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2401   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2402   PetscFunctionReturn(PETSC_SUCCESS);
2403 }
2404 
2405 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2406                                        MatGetRow_MPIBAIJ,
2407                                        MatRestoreRow_MPIBAIJ,
2408                                        MatMult_MPIBAIJ,
2409                                        /* 4*/ MatMultAdd_MPIBAIJ,
2410                                        MatMultTranspose_MPIBAIJ,
2411                                        MatMultTransposeAdd_MPIBAIJ,
2412                                        NULL,
2413                                        NULL,
2414                                        NULL,
2415                                        /*10*/ NULL,
2416                                        NULL,
2417                                        NULL,
2418                                        MatSOR_MPIBAIJ,
2419                                        MatTranspose_MPIBAIJ,
2420                                        /*15*/ MatGetInfo_MPIBAIJ,
2421                                        MatEqual_MPIBAIJ,
2422                                        MatGetDiagonal_MPIBAIJ,
2423                                        MatDiagonalScale_MPIBAIJ,
2424                                        MatNorm_MPIBAIJ,
2425                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2426                                        MatAssemblyEnd_MPIBAIJ,
2427                                        MatSetOption_MPIBAIJ,
2428                                        MatZeroEntries_MPIBAIJ,
2429                                        /*24*/ MatZeroRows_MPIBAIJ,
2430                                        NULL,
2431                                        NULL,
2432                                        NULL,
2433                                        NULL,
2434                                        /*29*/ MatSetUp_MPI_Hash,
2435                                        NULL,
2436                                        NULL,
2437                                        MatGetDiagonalBlock_MPIBAIJ,
2438                                        NULL,
2439                                        /*34*/ MatDuplicate_MPIBAIJ,
2440                                        NULL,
2441                                        NULL,
2442                                        NULL,
2443                                        NULL,
2444                                        /*39*/ MatAXPY_MPIBAIJ,
2445                                        MatCreateSubMatrices_MPIBAIJ,
2446                                        MatIncreaseOverlap_MPIBAIJ,
2447                                        MatGetValues_MPIBAIJ,
2448                                        MatCopy_MPIBAIJ,
2449                                        /*44*/ NULL,
2450                                        MatScale_MPIBAIJ,
2451                                        MatShift_MPIBAIJ,
2452                                        NULL,
2453                                        MatZeroRowsColumns_MPIBAIJ,
2454                                        /*49*/ NULL,
2455                                        NULL,
2456                                        NULL,
2457                                        NULL,
2458                                        NULL,
2459                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2460                                        NULL,
2461                                        MatSetUnfactored_MPIBAIJ,
2462                                        MatPermute_MPIBAIJ,
2463                                        MatSetValuesBlocked_MPIBAIJ,
2464                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2465                                        MatDestroy_MPIBAIJ,
2466                                        MatView_MPIBAIJ,
2467                                        NULL,
2468                                        NULL,
2469                                        /*64*/ NULL,
2470                                        NULL,
2471                                        NULL,
2472                                        NULL,
2473                                        NULL,
2474                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2475                                        NULL,
2476                                        NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        /*74*/ NULL,
2480                                        MatFDColoringApply_BAIJ,
2481                                        NULL,
2482                                        NULL,
2483                                        NULL,
2484                                        /*79*/ NULL,
2485                                        NULL,
2486                                        NULL,
2487                                        NULL,
2488                                        MatLoad_MPIBAIJ,
2489                                        /*84*/ NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        /*89*/ NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        NULL,
2498                                        NULL,
2499                                        /*94*/ NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        /*99*/ NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        MatConjugate_MPIBAIJ,
2508                                        NULL,
2509                                        /*104*/ NULL,
2510                                        MatRealPart_MPIBAIJ,
2511                                        MatImaginaryPart_MPIBAIJ,
2512                                        NULL,
2513                                        NULL,
2514                                        /*109*/ NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        MatMissingDiagonal_MPIBAIJ,
2519                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2520                                        NULL,
2521                                        MatGetGhosts_MPIBAIJ,
2522                                        NULL,
2523                                        NULL,
2524                                        /*119*/ NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        NULL,
2528                                        MatGetMultiProcBlock_MPIBAIJ,
2529                                        /*124*/ NULL,
2530                                        MatGetColumnReductions_MPIBAIJ,
2531                                        MatInvertBlockDiagonal_MPIBAIJ,
2532                                        NULL,
2533                                        NULL,
2534                                        /*129*/ NULL,
2535                                        NULL,
2536                                        NULL,
2537                                        NULL,
2538                                        NULL,
2539                                        /*134*/ NULL,
2540                                        NULL,
2541                                        NULL,
2542                                        NULL,
2543                                        NULL,
2544                                        /*139*/ MatSetBlockSizes_Default,
2545                                        NULL,
2546                                        NULL,
2547                                        MatFDColoringSetUp_MPIXAIJ,
2548                                        NULL,
2549                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2550                                        NULL,
2551                                        NULL,
2552                                        NULL,
2553                                        NULL,
2554                                        NULL,
2555                                        /*150*/ NULL,
2556                                        MatEliminateZeros_MPIBAIJ,
2557                                        NULL};
2558 
2559 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2560 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2561 
2562 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2563 {
2564   PetscInt        m, rstart, cstart, cend;
2565   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2566   const PetscInt *JJ          = NULL;
2567   PetscScalar    *values      = NULL;
2568   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2569   PetscBool       nooffprocentries;
2570 
2571   PetscFunctionBegin;
2572   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2573   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2574   PetscCall(PetscLayoutSetUp(B->rmap));
2575   PetscCall(PetscLayoutSetUp(B->cmap));
2576   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2577   m      = B->rmap->n / bs;
2578   rstart = B->rmap->rstart / bs;
2579   cstart = B->cmap->rstart / bs;
2580   cend   = B->cmap->rend / bs;
2581 
2582   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2583   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2584   for (i = 0; i < m; i++) {
2585     nz = ii[i + 1] - ii[i];
2586     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2587     nz_max = PetscMax(nz_max, nz);
2588     dlen   = 0;
2589     olen   = 0;
2590     JJ     = jj + ii[i];
2591     for (j = 0; j < nz; j++) {
2592       if (*JJ < cstart || *JJ >= cend) olen++;
2593       else dlen++;
2594       JJ++;
2595     }
2596     d_nnz[i] = dlen;
2597     o_nnz[i] = olen;
2598   }
2599   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2600   PetscCall(PetscFree2(d_nnz, o_nnz));
2601 
2602   values = (PetscScalar *)V;
2603   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2604   for (i = 0; i < m; i++) {
2605     PetscInt        row   = i + rstart;
2606     PetscInt        ncols = ii[i + 1] - ii[i];
2607     const PetscInt *icols = jj + ii[i];
2608     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2609       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2610       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2611     } else { /* block ordering does not match so we can only insert one block at a time. */
2612       PetscInt j;
2613       for (j = 0; j < ncols; j++) {
2614         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2615         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2616       }
2617     }
2618   }
2619 
2620   if (!V) PetscCall(PetscFree(values));
2621   nooffprocentries    = B->nooffprocentries;
2622   B->nooffprocentries = PETSC_TRUE;
2623   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2624   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2625   B->nooffprocentries = nooffprocentries;
2626 
2627   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2628   PetscFunctionReturn(PETSC_SUCCESS);
2629 }
2630 
2631 /*@C
2632   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2633 
2634   Collective
2635 
2636   Input Parameters:
2637 + B  - the matrix
2638 . bs - the block size
2639 . i  - the indices into `j` for the start of each local row (starts with zero)
2640 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2641 - v  - optional values in the matrix, use `NULL` if not provided
2642 
2643   Level: advanced
2644 
2645   Notes:
2646   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2647   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2648   called this routine.
2649 
2650   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2651   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2652   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2653   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2654   block column and the second index is over columns within a block.
2655 
2656   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2657 
2658 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2659 @*/
2660 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2661 {
2662   PetscFunctionBegin;
2663   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2664   PetscValidType(B, 1);
2665   PetscValidLogicalCollectiveInt(B, bs, 2);
2666   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2667   PetscFunctionReturn(PETSC_SUCCESS);
2668 }
2669 
2670 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2671 {
2672   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2673   PetscInt     i;
2674   PetscMPIInt  size;
2675 
2676   PetscFunctionBegin;
2677   if (B->hash_active) {
2678     B->ops[0]      = b->cops;
2679     B->hash_active = PETSC_FALSE;
2680   }
2681   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2682   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2683   PetscCall(PetscLayoutSetUp(B->rmap));
2684   PetscCall(PetscLayoutSetUp(B->cmap));
2685   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2686 
2687   if (d_nnz) {
2688     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2689   }
2690   if (o_nnz) {
2691     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2692   }
2693 
2694   b->bs2 = bs * bs;
2695   b->mbs = B->rmap->n / bs;
2696   b->nbs = B->cmap->n / bs;
2697   b->Mbs = B->rmap->N / bs;
2698   b->Nbs = B->cmap->N / bs;
2699 
2700   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2701   b->rstartbs = B->rmap->rstart / bs;
2702   b->rendbs   = B->rmap->rend / bs;
2703   b->cstartbs = B->cmap->rstart / bs;
2704   b->cendbs   = B->cmap->rend / bs;
2705 
2706 #if defined(PETSC_USE_CTABLE)
2707   PetscCall(PetscHMapIDestroy(&b->colmap));
2708 #else
2709   PetscCall(PetscFree(b->colmap));
2710 #endif
2711   PetscCall(PetscFree(b->garray));
2712   PetscCall(VecDestroy(&b->lvec));
2713   PetscCall(VecScatterDestroy(&b->Mvctx));
2714 
2715   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2716 
2717   MatSeqXAIJGetOptions_Private(b->B);
2718   PetscCall(MatDestroy(&b->B));
2719   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2720   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2721   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2722   MatSeqXAIJRestoreOptions_Private(b->B);
2723 
2724   MatSeqXAIJGetOptions_Private(b->A);
2725   PetscCall(MatDestroy(&b->A));
2726   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2727   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2728   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2729   MatSeqXAIJRestoreOptions_Private(b->A);
2730 
2731   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2732   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2733   B->preallocated  = PETSC_TRUE;
2734   B->was_assembled = PETSC_FALSE;
2735   B->assembled     = PETSC_FALSE;
2736   PetscFunctionReturn(PETSC_SUCCESS);
2737 }
2738 
2739 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2740 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2741 
2742 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2743 {
2744   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2745   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2746   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2747   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2748 
2749   PetscFunctionBegin;
2750   PetscCall(PetscMalloc1(M + 1, &ii));
2751   ii[0] = 0;
2752   for (i = 0; i < M; i++) {
2753     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2754     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2755     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2756     /* remove one from count of matrix has diagonal */
2757     for (j = id[i]; j < id[i + 1]; j++) {
2758       if (jd[j] == i) {
2759         ii[i + 1]--;
2760         break;
2761       }
2762     }
2763   }
2764   PetscCall(PetscMalloc1(ii[M], &jj));
2765   cnt = 0;
2766   for (i = 0; i < M; i++) {
2767     for (j = io[i]; j < io[i + 1]; j++) {
2768       if (garray[jo[j]] > rstart) break;
2769       jj[cnt++] = garray[jo[j]];
2770     }
2771     for (k = id[i]; k < id[i + 1]; k++) {
2772       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2773     }
2774     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2775   }
2776   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2777   PetscFunctionReturn(PETSC_SUCCESS);
2778 }
2779 
2780 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2781 
2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2783 
2784 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2785 {
2786   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2787   Mat_MPIAIJ  *b;
2788   Mat          B;
2789 
2790   PetscFunctionBegin;
2791   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2792 
2793   if (reuse == MAT_REUSE_MATRIX) {
2794     B = *newmat;
2795   } else {
2796     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2797     PetscCall(MatSetType(B, MATMPIAIJ));
2798     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2799     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2800     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2801     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2802   }
2803   b = (Mat_MPIAIJ *)B->data;
2804 
2805   if (reuse == MAT_REUSE_MATRIX) {
2806     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2807     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2808   } else {
2809     PetscInt   *garray = a->garray;
2810     Mat_SeqAIJ *bB;
2811     PetscInt    bs, nnz;
2812     PetscCall(MatDestroy(&b->A));
2813     PetscCall(MatDestroy(&b->B));
2814     /* just clear out the data structure */
2815     PetscCall(MatDisAssemble_MPIAIJ(B));
2816     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2817     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2818 
2819     /* Global numbering for b->B columns */
2820     bB  = (Mat_SeqAIJ *)b->B->data;
2821     bs  = A->rmap->bs;
2822     nnz = bB->i[A->rmap->n];
2823     for (PetscInt k = 0; k < nnz; k++) {
2824       PetscInt bj = bB->j[k] / bs;
2825       PetscInt br = bB->j[k] % bs;
2826       bB->j[k]    = garray[bj] * bs + br;
2827     }
2828   }
2829   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2830   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2831 
2832   if (reuse == MAT_INPLACE_MATRIX) {
2833     PetscCall(MatHeaderReplace(A, &B));
2834   } else {
2835     *newmat = B;
2836   }
2837   PetscFunctionReturn(PETSC_SUCCESS);
2838 }
2839 
2840 /*MC
2841    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2842 
2843    Options Database Keys:
2844 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2845 . -mat_block_size <bs> - set the blocksize used to store the matrix
2846 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2847 - -mat_use_hash_table <fact> - set hash table factor
2848 
2849    Level: beginner
2850 
2851    Note:
2852     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2853     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2854 
2855 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2856 M*/
2857 
2858 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2859 
2860 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2861 {
2862   Mat_MPIBAIJ *b;
2863   PetscBool    flg = PETSC_FALSE;
2864 
2865   PetscFunctionBegin;
2866   PetscCall(PetscNew(&b));
2867   B->data      = (void *)b;
2868   B->ops[0]    = MatOps_Values;
2869   B->assembled = PETSC_FALSE;
2870 
2871   B->insertmode = NOT_SET_VALUES;
2872   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2873   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2874 
2875   /* build local table of row and column ownerships */
2876   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2877 
2878   /* build cache for off array entries formed */
2879   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2880 
2881   b->donotstash  = PETSC_FALSE;
2882   b->colmap      = NULL;
2883   b->garray      = NULL;
2884   b->roworiented = PETSC_TRUE;
2885 
2886   /* stuff used in block assembly */
2887   b->barray = NULL;
2888 
2889   /* stuff used for matrix vector multiply */
2890   b->lvec  = NULL;
2891   b->Mvctx = NULL;
2892 
2893   /* stuff for MatGetRow() */
2894   b->rowindices   = NULL;
2895   b->rowvalues    = NULL;
2896   b->getrowactive = PETSC_FALSE;
2897 
2898   /* hash table stuff */
2899   b->ht           = NULL;
2900   b->hd           = NULL;
2901   b->ht_size      = 0;
2902   b->ht_flag      = PETSC_FALSE;
2903   b->ht_fact      = 0;
2904   b->ht_total_ct  = 0;
2905   b->ht_insert_ct = 0;
2906 
2907   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2908   b->ijonly = PETSC_FALSE;
2909 
2910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2913 #if defined(PETSC_HAVE_HYPRE)
2914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2915 #endif
2916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2923   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2924 
2925   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2926   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2927   if (flg) {
2928     PetscReal fact = 1.39;
2929     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2930     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2931     if (fact <= 1.0) fact = 1.39;
2932     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2933     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2934   }
2935   PetscOptionsEnd();
2936   PetscFunctionReturn(PETSC_SUCCESS);
2937 }
2938 
2939 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2940 /*MC
2941    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2942 
2943    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2944    and `MATMPIBAIJ` otherwise.
2945 
2946    Options Database Keys:
2947 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2948 
2949   Level: beginner
2950 
2951 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2952 M*/
2953 
2954 /*@C
2955   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2956   (block compressed row).
2957 
2958   Collective
2959 
2960   Input Parameters:
2961 + B     - the matrix
2962 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2963           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2964 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2965            submatrix  (same for all local rows)
2966 . d_nnz - array containing the number of block nonzeros in the various block rows
2967            of the in diagonal portion of the local (possibly different for each block
2968            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2969            set it even if it is zero.
2970 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2971            submatrix (same for all local rows).
2972 - o_nnz - array containing the number of nonzeros in the various block rows of the
2973            off-diagonal portion of the local submatrix (possibly different for
2974            each block row) or `NULL`.
2975 
2976    If the *_nnz parameter is given then the *_nz parameter is ignored
2977 
2978   Options Database Keys:
2979 + -mat_block_size            - size of the blocks to use
2980 - -mat_use_hash_table <fact> - set hash table factor
2981 
2982   Level: intermediate
2983 
2984   Notes:
2985   For good matrix assembly performance
2986   the user should preallocate the matrix storage by setting the parameters
2987   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2988   performance can be increased by more than a factor of 50.
2989 
2990   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2991   than it must be used on all processors that share the object for that argument.
2992 
2993   Storage Information:
2994   For a square global matrix we define each processor's diagonal portion
2995   to be its local rows and the corresponding columns (a square submatrix);
2996   each processor's off-diagonal portion encompasses the remainder of the
2997   local matrix (a rectangular submatrix).
2998 
2999   The user can specify preallocated storage for the diagonal part of
3000   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3001   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3002   memory allocation.  Likewise, specify preallocated storage for the
3003   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3004 
3005   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3006   the figure below we depict these three local rows and all columns (0-11).
3007 
3008 .vb
3009            0 1 2 3 4 5 6 7 8 9 10 11
3010           --------------------------
3011    row 3  |o o o d d d o o o o  o  o
3012    row 4  |o o o d d d o o o o  o  o
3013    row 5  |o o o d d d o o o o  o  o
3014           --------------------------
3015 .ve
3016 
3017   Thus, any entries in the d locations are stored in the d (diagonal)
3018   submatrix, and any entries in the o locations are stored in the
3019   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3020   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3021 
3022   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3023   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3024   In general, for PDE problems in which most nonzeros are near the diagonal,
3025   one expects `d_nz` >> `o_nz`.
3026 
3027   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3028   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3029   You can also run with the option `-info` and look for messages with the string
3030   malloc in them to see if additional memory allocation was needed.
3031 
3032 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3033 @*/
3034 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3035 {
3036   PetscFunctionBegin;
3037   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3038   PetscValidType(B, 1);
3039   PetscValidLogicalCollectiveInt(B, bs, 2);
3040   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3041   PetscFunctionReturn(PETSC_SUCCESS);
3042 }
3043 
3044 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3045 /*@C
3046   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3047   (block compressed row).
3048 
3049   Collective
3050 
3051   Input Parameters:
3052 + comm  - MPI communicator
3053 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3054           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3055 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3056           This value should be the same as the local size used in creating the
3057           y vector for the matrix-vector product y = Ax.
3058 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3059           This value should be the same as the local size used in creating the
3060           x vector for the matrix-vector product y = Ax.
3061 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3062 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3063 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3064           submatrix  (same for all local rows)
3065 . d_nnz - array containing the number of nonzero blocks in the various block rows
3066           of the in diagonal portion of the local (possibly different for each block
3067           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3068           and set it even if it is zero.
3069 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3070           submatrix (same for all local rows).
3071 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3072           off-diagonal portion of the local submatrix (possibly different for
3073           each block row) or NULL.
3074 
3075   Output Parameter:
3076 . A - the matrix
3077 
3078   Options Database Keys:
3079 + -mat_block_size            - size of the blocks to use
3080 - -mat_use_hash_table <fact> - set hash table factor
3081 
3082   Level: intermediate
3083 
3084   Notes:
3085   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3086   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3087   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3088 
3089   For good matrix assembly performance
3090   the user should preallocate the matrix storage by setting the parameters
3091   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3092   performance can be increased by more than a factor of 50.
3093 
3094   If the *_nnz parameter is given then the *_nz parameter is ignored
3095 
3096   A nonzero block is any block that as 1 or more nonzeros in it
3097 
3098   The user MUST specify either the local or global matrix dimensions
3099   (possibly both).
3100 
3101   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3102   than it must be used on all processors that share the object for that argument.
3103 
3104   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3105   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3106 
3107   Storage Information:
3108   For a square global matrix we define each processor's diagonal portion
3109   to be its local rows and the corresponding columns (a square submatrix);
3110   each processor's off-diagonal portion encompasses the remainder of the
3111   local matrix (a rectangular submatrix).
3112 
3113   The user can specify preallocated storage for the diagonal part of
3114   the local submatrix with either d_nz or d_nnz (not both).  Set
3115   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3116   memory allocation.  Likewise, specify preallocated storage for the
3117   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3118 
3119   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3120   the figure below we depict these three local rows and all columns (0-11).
3121 
3122 .vb
3123            0 1 2 3 4 5 6 7 8 9 10 11
3124           --------------------------
3125    row 3  |o o o d d d o o o o  o  o
3126    row 4  |o o o d d d o o o o  o  o
3127    row 5  |o o o d d d o o o o  o  o
3128           --------------------------
3129 .ve
3130 
3131   Thus, any entries in the d locations are stored in the d (diagonal)
3132   submatrix, and any entries in the o locations are stored in the
3133   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3134   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3135 
3136   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3137   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3138   In general, for PDE problems in which most nonzeros are near the diagonal,
3139   one expects `d_nz` >> `o_nz`.
3140 
3141 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3142           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3143 @*/
3144 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3145 {
3146   PetscMPIInt size;
3147 
3148   PetscFunctionBegin;
3149   PetscCall(MatCreate(comm, A));
3150   PetscCall(MatSetSizes(*A, m, n, M, N));
3151   PetscCallMPI(MPI_Comm_size(comm, &size));
3152   if (size > 1) {
3153     PetscCall(MatSetType(*A, MATMPIBAIJ));
3154     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3155   } else {
3156     PetscCall(MatSetType(*A, MATSEQBAIJ));
3157     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3158   }
3159   PetscFunctionReturn(PETSC_SUCCESS);
3160 }
3161 
3162 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3163 {
3164   Mat          mat;
3165   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3166   PetscInt     len = 0;
3167 
3168   PetscFunctionBegin;
3169   *newmat = NULL;
3170   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3171   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3172   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3173 
3174   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3175   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3176   if (matin->hash_active) {
3177     PetscCall(MatSetUp(mat));
3178   } else {
3179     mat->factortype   = matin->factortype;
3180     mat->preallocated = PETSC_TRUE;
3181     mat->assembled    = PETSC_TRUE;
3182     mat->insertmode   = NOT_SET_VALUES;
3183 
3184     a             = (Mat_MPIBAIJ *)mat->data;
3185     mat->rmap->bs = matin->rmap->bs;
3186     a->bs2        = oldmat->bs2;
3187     a->mbs        = oldmat->mbs;
3188     a->nbs        = oldmat->nbs;
3189     a->Mbs        = oldmat->Mbs;
3190     a->Nbs        = oldmat->Nbs;
3191 
3192     a->size         = oldmat->size;
3193     a->rank         = oldmat->rank;
3194     a->donotstash   = oldmat->donotstash;
3195     a->roworiented  = oldmat->roworiented;
3196     a->rowindices   = NULL;
3197     a->rowvalues    = NULL;
3198     a->getrowactive = PETSC_FALSE;
3199     a->barray       = NULL;
3200     a->rstartbs     = oldmat->rstartbs;
3201     a->rendbs       = oldmat->rendbs;
3202     a->cstartbs     = oldmat->cstartbs;
3203     a->cendbs       = oldmat->cendbs;
3204 
3205     /* hash table stuff */
3206     a->ht           = NULL;
3207     a->hd           = NULL;
3208     a->ht_size      = 0;
3209     a->ht_flag      = oldmat->ht_flag;
3210     a->ht_fact      = oldmat->ht_fact;
3211     a->ht_total_ct  = 0;
3212     a->ht_insert_ct = 0;
3213 
3214     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3215     if (oldmat->colmap) {
3216 #if defined(PETSC_USE_CTABLE)
3217       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3218 #else
3219       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3220       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3221 #endif
3222     } else a->colmap = NULL;
3223 
3224     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3225       PetscCall(PetscMalloc1(len, &a->garray));
3226       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3227     } else a->garray = NULL;
3228 
3229     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3230     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3231     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3232 
3233     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3234     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3235   }
3236   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3237   *newmat = mat;
3238   PetscFunctionReturn(PETSC_SUCCESS);
3239 }
3240 
3241 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3242 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3243 {
3244   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3245   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3246   PetscScalar *matvals;
3247 
3248   PetscFunctionBegin;
3249   PetscCall(PetscViewerSetUp(viewer));
3250 
3251   /* read in matrix header */
3252   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3253   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3254   M  = header[1];
3255   N  = header[2];
3256   nz = header[3];
3257   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3258   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3259   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3260 
3261   /* set block sizes from the viewer's .info file */
3262   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3263   /* set local sizes if not set already */
3264   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3265   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3266   /* set global sizes if not set already */
3267   if (mat->rmap->N < 0) mat->rmap->N = M;
3268   if (mat->cmap->N < 0) mat->cmap->N = N;
3269   PetscCall(PetscLayoutSetUp(mat->rmap));
3270   PetscCall(PetscLayoutSetUp(mat->cmap));
3271 
3272   /* check if the matrix sizes are correct */
3273   PetscCall(MatGetSize(mat, &rows, &cols));
3274   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3275   PetscCall(MatGetBlockSize(mat, &bs));
3276   PetscCall(MatGetLocalSize(mat, &m, &n));
3277   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3278   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3279   mbs = m / bs;
3280   nbs = n / bs;
3281 
3282   /* read in row lengths and build row indices */
3283   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3284   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3285   rowidxs[0] = 0;
3286   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3287   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3288   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3289 
3290   /* read in column indices and matrix values */
3291   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3292   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3293   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3294 
3295   {                /* preallocate matrix storage */
3296     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3297     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3298     PetscBool  sbaij, done;
3299     PetscInt  *d_nnz, *o_nnz;
3300 
3301     PetscCall(PetscBTCreate(nbs, &bt));
3302     PetscCall(PetscHSetICreate(&ht));
3303     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3304     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3305     for (i = 0; i < mbs; i++) {
3306       PetscCall(PetscBTMemzero(nbs, bt));
3307       PetscCall(PetscHSetIClear(ht));
3308       for (k = 0; k < bs; k++) {
3309         PetscInt row = bs * i + k;
3310         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3311           PetscInt col = colidxs[j];
3312           if (!sbaij || col >= row) {
3313             if (col >= cs && col < ce) {
3314               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3315             } else {
3316               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3317               if (done) o_nnz[i]++;
3318             }
3319           }
3320         }
3321       }
3322     }
3323     PetscCall(PetscBTDestroy(&bt));
3324     PetscCall(PetscHSetIDestroy(&ht));
3325     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3326     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3327     PetscCall(PetscFree2(d_nnz, o_nnz));
3328   }
3329 
3330   /* store matrix values */
3331   for (i = 0; i < m; i++) {
3332     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3333     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3334   }
3335 
3336   PetscCall(PetscFree(rowidxs));
3337   PetscCall(PetscFree2(colidxs, matvals));
3338   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3339   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3340   PetscFunctionReturn(PETSC_SUCCESS);
3341 }
3342 
3343 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3344 {
3345   PetscBool isbinary;
3346 
3347   PetscFunctionBegin;
3348   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3349   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3350   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3351   PetscFunctionReturn(PETSC_SUCCESS);
3352 }
3353 
3354 /*@
3355   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3356 
3357   Input Parameters:
3358 + mat  - the matrix
3359 - fact - factor
3360 
3361   Options Database Key:
3362 . -mat_use_hash_table <fact> - provide the factor
3363 
3364   Level: advanced
3365 
3366 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3367 @*/
3368 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3369 {
3370   PetscFunctionBegin;
3371   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3372   PetscFunctionReturn(PETSC_SUCCESS);
3373 }
3374 
3375 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3376 {
3377   Mat_MPIBAIJ *baij;
3378 
3379   PetscFunctionBegin;
3380   baij          = (Mat_MPIBAIJ *)mat->data;
3381   baij->ht_fact = fact;
3382   PetscFunctionReturn(PETSC_SUCCESS);
3383 }
3384 
3385 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3386 {
3387   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3388   PetscBool    flg;
3389 
3390   PetscFunctionBegin;
3391   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3392   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3393   if (Ad) *Ad = a->A;
3394   if (Ao) *Ao = a->B;
3395   if (colmap) *colmap = a->garray;
3396   PetscFunctionReturn(PETSC_SUCCESS);
3397 }
3398 
3399 /*
3400     Special version for direct calls from Fortran (to eliminate two function call overheads
3401 */
3402 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3403   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3404 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3405   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3406 #endif
3407 
3408 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3409 /*@C
3410   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3411 
3412   Collective
3413 
3414   Input Parameters:
3415 + matin  - the matrix
3416 . min    - number of input rows
3417 . im     - input rows
3418 . nin    - number of input columns
3419 . in     - input columns
3420 . v      - numerical values input
3421 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3422 
3423   Level: advanced
3424 
3425   Developer Notes:
3426   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3427 
3428 .seealso: `Mat`, `MatSetValuesBlocked()`
3429 @*/
3430 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3431 {
3432   /* convert input arguments to C version */
3433   Mat        mat = *matin;
3434   PetscInt   m = *min, n = *nin;
3435   InsertMode addv = *addvin;
3436 
3437   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3438   const MatScalar *value;
3439   MatScalar       *barray      = baij->barray;
3440   PetscBool        roworiented = baij->roworiented;
3441   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3442   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3443   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3444 
3445   PetscFunctionBegin;
3446   /* tasks normally handled by MatSetValuesBlocked() */
3447   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3448   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3449   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3450   if (mat->assembled) {
3451     mat->was_assembled = PETSC_TRUE;
3452     mat->assembled     = PETSC_FALSE;
3453   }
3454   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3455 
3456   if (!barray) {
3457     PetscCall(PetscMalloc1(bs2, &barray));
3458     baij->barray = barray;
3459   }
3460 
3461   if (roworiented) stepval = (n - 1) * bs;
3462   else stepval = (m - 1) * bs;
3463 
3464   for (i = 0; i < m; i++) {
3465     if (im[i] < 0) continue;
3466     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3467     if (im[i] >= rstart && im[i] < rend) {
3468       row = im[i] - rstart;
3469       for (j = 0; j < n; j++) {
3470         /* If NumCol = 1 then a copy is not required */
3471         if ((roworiented) && (n == 1)) {
3472           barray = (MatScalar *)v + i * bs2;
3473         } else if ((!roworiented) && (m == 1)) {
3474           barray = (MatScalar *)v + j * bs2;
3475         } else { /* Here a copy is required */
3476           if (roworiented) {
3477             value = v + i * (stepval + bs) * bs + j * bs;
3478           } else {
3479             value = v + j * (stepval + bs) * bs + i * bs;
3480           }
3481           for (ii = 0; ii < bs; ii++, value += stepval) {
3482             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3483           }
3484           barray -= bs2;
3485         }
3486 
3487         if (in[j] >= cstart && in[j] < cend) {
3488           col = in[j] - cstart;
3489           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3490         } else if (in[j] < 0) {
3491           continue;
3492         } else {
3493           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3494           if (mat->was_assembled) {
3495             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3496 
3497 #if defined(PETSC_USE_DEBUG)
3498   #if defined(PETSC_USE_CTABLE)
3499             {
3500               PetscInt data;
3501               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3502               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3503             }
3504   #else
3505             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3506   #endif
3507 #endif
3508 #if defined(PETSC_USE_CTABLE)
3509             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3510             col = (col - 1) / bs;
3511 #else
3512             col = (baij->colmap[in[j]] - 1) / bs;
3513 #endif
3514             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3515               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3516               col = in[j];
3517             }
3518           } else col = in[j];
3519           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3520         }
3521       }
3522     } else {
3523       if (!baij->donotstash) {
3524         if (roworiented) {
3525           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3526         } else {
3527           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3528         }
3529       }
3530     }
3531   }
3532 
3533   /* task normally handled by MatSetValuesBlocked() */
3534   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3535   PetscFunctionReturn(PETSC_SUCCESS);
3536 }
3537 
3538 /*@
3539   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3540 
3541   Collective
3542 
3543   Input Parameters:
3544 + comm - MPI communicator
3545 . bs   - the block size, only a block size of 1 is supported
3546 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3547 . n    - This value should be the same as the local size used in creating the
3548          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3549          calculated if `N` is given) For square matrices `n` is almost always `m`.
3550 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3551 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3552 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3553 . j    - column indices
3554 - a    - matrix values
3555 
3556   Output Parameter:
3557 . mat - the matrix
3558 
3559   Level: intermediate
3560 
3561   Notes:
3562   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3563   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3564   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3565 
3566   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3567   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3568   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3569   with column-major ordering within blocks.
3570 
3571   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3572 
3573 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3574           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3575 @*/
3576 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3577 {
3578   PetscFunctionBegin;
3579   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3580   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3581   PetscCall(MatCreate(comm, mat));
3582   PetscCall(MatSetSizes(*mat, m, n, M, N));
3583   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3584   PetscCall(MatSetBlockSize(*mat, bs));
3585   PetscCall(MatSetUp(*mat));
3586   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3587   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3588   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3589   PetscFunctionReturn(PETSC_SUCCESS);
3590 }
3591 
3592 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3593 {
3594   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3595   PetscInt    *indx;
3596   PetscScalar *values;
3597 
3598   PetscFunctionBegin;
3599   PetscCall(MatGetSize(inmat, &m, &N));
3600   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3601     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3602     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3603     PetscInt    *bindx, rmax = a->rmax, j;
3604     PetscMPIInt  rank, size;
3605 
3606     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3607     mbs = m / bs;
3608     Nbs = N / cbs;
3609     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3610     nbs = n / cbs;
3611 
3612     PetscCall(PetscMalloc1(rmax, &bindx));
3613     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3614 
3615     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3616     PetscCallMPI(MPI_Comm_rank(comm, &size));
3617     if (rank == size - 1) {
3618       /* Check sum(nbs) = Nbs */
3619       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3620     }
3621 
3622     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3623     for (i = 0; i < mbs; i++) {
3624       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3625       nnz = nnz / bs;
3626       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3627       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3628       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3629     }
3630     PetscCall(PetscFree(bindx));
3631 
3632     PetscCall(MatCreate(comm, outmat));
3633     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3634     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3635     PetscCall(MatSetType(*outmat, MATBAIJ));
3636     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3637     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3638     MatPreallocateEnd(dnz, onz);
3639     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3640   }
3641 
3642   /* numeric phase */
3643   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3644   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3645 
3646   for (i = 0; i < m; i++) {
3647     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3648     Ii = i + rstart;
3649     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3650     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3651   }
3652   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3653   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3654   PetscFunctionReturn(PETSC_SUCCESS);
3655 }
3656