xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 8564c97f3fe574910f676ffb31bf76fa44548916)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
101 {
102   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
103 
104   PetscFunctionBegin;
105   PetscCall(MatStoreValues(aij->A));
106   PetscCall(MatStoreValues(aij->B));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
111 {
112   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
113 
114   PetscFunctionBegin;
115   PetscCall(MatRetrieveValues(aij->A));
116   PetscCall(MatRetrieveValues(aij->B));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 /*
121      Local utility routine that creates a mapping from the global column
122    number to the local number in the off-diagonal part of the local
123    storage of the matrix.  This is done in a non scalable way since the
124    length of colmap equals the global matrix length.
125 */
126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
127 {
128   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
129   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
130   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
131 
132   PetscFunctionBegin;
133 #if defined(PETSC_USE_CTABLE)
134   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
135   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
136 #else
137   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
138   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
139 #endif
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
144   do { \
145     brow = row / bs; \
146     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
147     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
148     rmax = aimax[brow]; \
149     nrow = ailen[brow]; \
150     bcol = col / bs; \
151     ridx = row % bs; \
152     cidx = col % bs; \
153     low  = 0; \
154     high = nrow; \
155     while (high - low > 3) { \
156       t = (low + high) / 2; \
157       if (rp[t] > bcol) high = t; \
158       else low = t; \
159     } \
160     for (_i = low; _i < high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap + bs2 * _i + bs * cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value; \
165         else *bap = value; \
166         goto a_noinsert; \
167       } \
168     } \
169     if (a->nonew == 1) goto a_noinsert; \
170     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
171     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
172     N = nrow++ - 1; \
173     /* shift up all the later entries in this row */ \
174     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
175     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
176     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
177     rp[_i]                          = bcol; \
178     ap[bs2 * _i + bs * cidx + ridx] = value; \
179   a_noinsert:; \
180     ailen[brow] = nrow; \
181   } while (0)
182 
183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
184   do { \
185     brow = row / bs; \
186     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
187     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
188     rmax = bimax[brow]; \
189     nrow = bilen[brow]; \
190     bcol = col / bs; \
191     ridx = row % bs; \
192     cidx = col % bs; \
193     low  = 0; \
194     high = nrow; \
195     while (high - low > 3) { \
196       t = (low + high) / 2; \
197       if (rp[t] > bcol) high = t; \
198       else low = t; \
199     } \
200     for (_i = low; _i < high; _i++) { \
201       if (rp[_i] > bcol) break; \
202       if (rp[_i] == bcol) { \
203         bap = ap + bs2 * _i + bs * cidx + ridx; \
204         if (addv == ADD_VALUES) *bap += value; \
205         else *bap = value; \
206         goto b_noinsert; \
207       } \
208     } \
209     if (b->nonew == 1) goto b_noinsert; \
210     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
211     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
212     N = nrow++ - 1; \
213     /* shift up all the later entries in this row */ \
214     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
215     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
216     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
217     rp[_i]                          = bcol; \
218     ap[bs2 * _i + bs * cidx + ridx] = value; \
219   b_noinsert:; \
220     bilen[brow] = nrow; \
221   } while (0)
222 
223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
224 {
225   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
226   MatScalar    value;
227   PetscBool    roworiented = baij->roworiented;
228   PetscInt     i, j, row, col;
229   PetscInt     rstart_orig = mat->rmap->rstart;
230   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
231   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
232 
233   /* Some Variables required in the macro */
234   Mat          A     = baij->A;
235   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
236   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
237   MatScalar   *aa = a->a;
238 
239   Mat          B     = baij->B;
240   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
241   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
242   MatScalar   *ba = b->a;
243 
244   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
245   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
246   MatScalar *ap, *bap;
247 
248   PetscFunctionBegin;
249   for (i = 0; i < m; i++) {
250     if (im[i] < 0) continue;
251     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
252     if (im[i] >= rstart_orig && im[i] < rend_orig) {
253       row = im[i] - rstart_orig;
254       for (j = 0; j < n; j++) {
255         if (in[j] >= cstart_orig && in[j] < cend_orig) {
256           col = in[j] - cstart_orig;
257           if (roworiented) value = v[i * n + j];
258           else value = v[i + j * m];
259           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
260         } else if (in[j] < 0) {
261           continue;
262         } else {
263           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
264           if (mat->was_assembled) {
265             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
266 #if defined(PETSC_USE_CTABLE)
267             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
268             col = col - 1;
269 #else
270             col = baij->colmap[in[j] / bs] - 1;
271 #endif
272             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
273               PetscCall(MatDisAssemble_MPIBAIJ(mat));
274               col = in[j];
275               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
276               B     = baij->B;
277               b     = (Mat_SeqBAIJ *)(B)->data;
278               bimax = b->imax;
279               bi    = b->i;
280               bilen = b->ilen;
281               bj    = b->j;
282               ba    = b->a;
283             } else {
284               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
285               col += in[j] % bs;
286             }
287           } else col = in[j];
288           if (roworiented) value = v[i * n + j];
289           else value = v[i + j * m];
290           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
291           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
292         }
293       }
294     } else {
295       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
296       if (!baij->donotstash) {
297         mat->assembled = PETSC_FALSE;
298         if (roworiented) {
299           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
300         } else {
301           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
302         }
303       }
304     }
305   }
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
310 {
311   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
312   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
313   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
314   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
315   PetscBool          roworiented = a->roworiented;
316   const PetscScalar *value       = v;
317   MatScalar         *ap, *aa = a->a, *bap;
318 
319   PetscFunctionBegin;
320   rp    = aj + ai[row];
321   ap    = aa + bs2 * ai[row];
322   rmax  = imax[row];
323   nrow  = ailen[row];
324   value = v;
325   low   = 0;
326   high  = nrow;
327   while (high - low > 7) {
328     t = (low + high) / 2;
329     if (rp[t] > col) high = t;
330     else low = t;
331   }
332   for (i = low; i < high; i++) {
333     if (rp[i] > col) break;
334     if (rp[i] == col) {
335       bap = ap + bs2 * i;
336       if (roworiented) {
337         if (is == ADD_VALUES) {
338           for (ii = 0; ii < bs; ii++) {
339             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
340           }
341         } else {
342           for (ii = 0; ii < bs; ii++) {
343             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
344           }
345         }
346       } else {
347         if (is == ADD_VALUES) {
348           for (ii = 0; ii < bs; ii++, value += bs) {
349             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
350             bap += bs;
351           }
352         } else {
353           for (ii = 0; ii < bs; ii++, value += bs) {
354             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
355             bap += bs;
356           }
357         }
358       }
359       goto noinsert2;
360     }
361   }
362   if (nonew == 1) goto noinsert2;
363   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
364   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
365   N = nrow++ - 1;
366   high++;
367   /* shift up all the later entries in this row */
368   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
369   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
370   rp[i] = col;
371   bap   = ap + bs2 * i;
372   if (roworiented) {
373     for (ii = 0; ii < bs; ii++) {
374       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
375     }
376   } else {
377     for (ii = 0; ii < bs; ii++) {
378       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
379     }
380   }
381 noinsert2:;
382   ailen[row] = nrow;
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 /*
387     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
388     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
389 */
390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
391 {
392   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
393   const PetscScalar *value;
394   MatScalar         *barray      = baij->barray;
395   PetscBool          roworiented = baij->roworiented;
396   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
397   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
398   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
399 
400   PetscFunctionBegin;
401   if (!barray) {
402     PetscCall(PetscMalloc1(bs2, &barray));
403     baij->barray = barray;
404   }
405 
406   if (roworiented) stepval = (n - 1) * bs;
407   else stepval = (m - 1) * bs;
408 
409   for (i = 0; i < m; i++) {
410     if (im[i] < 0) continue;
411     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
412     if (im[i] >= rstart && im[i] < rend) {
413       row = im[i] - rstart;
414       for (j = 0; j < n; j++) {
415         /* If NumCol = 1 then a copy is not required */
416         if ((roworiented) && (n == 1)) {
417           barray = (MatScalar *)v + i * bs2;
418         } else if ((!roworiented) && (m == 1)) {
419           barray = (MatScalar *)v + j * bs2;
420         } else { /* Here a copy is required */
421           if (roworiented) {
422             value = v + (i * (stepval + bs) + j) * bs;
423           } else {
424             value = v + (j * (stepval + bs) + i) * bs;
425           }
426           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
427             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
428             barray += bs;
429           }
430           barray -= bs2;
431         }
432 
433         if (in[j] >= cstart && in[j] < cend) {
434           col = in[j] - cstart;
435           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
436         } else if (in[j] < 0) {
437           continue;
438         } else {
439           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
440           if (mat->was_assembled) {
441             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
442 
443 #if defined(PETSC_USE_DEBUG)
444   #if defined(PETSC_USE_CTABLE)
445             {
446               PetscInt data;
447               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
448               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
449             }
450   #else
451             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
452   #endif
453 #endif
454 #if defined(PETSC_USE_CTABLE)
455             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
456             col = (col - 1) / bs;
457 #else
458             col = (baij->colmap[in[j]] - 1) / bs;
459 #endif
460             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
461               PetscCall(MatDisAssemble_MPIBAIJ(mat));
462               col = in[j];
463             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
464           } else col = in[j];
465           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
466         }
467       }
468     } else {
469       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
470       if (!baij->donotstash) {
471         if (roworiented) {
472           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
473         } else {
474           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475         }
476       }
477     }
478   }
479   PetscFunctionReturn(PETSC_SUCCESS);
480 }
481 
482 #define HASH_KEY             0.6180339887
483 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
487 {
488   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
489   PetscBool    roworiented = baij->roworiented;
490   PetscInt     i, j, row, col;
491   PetscInt     rstart_orig = mat->rmap->rstart;
492   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
493   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
494   PetscReal    tmp;
495   MatScalar  **HD       = baij->hd, value;
496   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
497 
498   PetscFunctionBegin;
499   for (i = 0; i < m; i++) {
500     if (PetscDefined(USE_DEBUG)) {
501       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
502       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
503     }
504     row = im[i];
505     if (row >= rstart_orig && row < rend_orig) {
506       for (j = 0; j < n; j++) {
507         col = in[j];
508         if (roworiented) value = v[i * n + j];
509         else value = v[i + j * m];
510         /* Look up PetscInto the Hash Table */
511         key = (row / bs) * Nbs + (col / bs) + 1;
512         h1  = HASH(size, key, tmp);
513 
514         idx = h1;
515         if (PetscDefined(USE_DEBUG)) {
516           insert_ct++;
517           total_ct++;
518           if (HT[idx] != key) {
519             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
520             if (idx == size) {
521               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
522               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
523             }
524           }
525         } else if (HT[idx] != key) {
526           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
527           if (idx == size) {
528             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
529             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
530           }
531         }
532         /* A HASH table entry is found, so insert the values at the correct address */
533         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
534         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
535       }
536     } else if (!baij->donotstash) {
537       if (roworiented) {
538         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
539       } else {
540         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
541       }
542     }
543   }
544   if (PetscDefined(USE_DEBUG)) {
545     baij->ht_total_ct += total_ct;
546     baij->ht_insert_ct += insert_ct;
547   }
548   PetscFunctionReturn(PETSC_SUCCESS);
549 }
550 
551 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
552 {
553   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
554   PetscBool          roworiented = baij->roworiented;
555   PetscInt           i, j, ii, jj, row, col;
556   PetscInt           rstart = baij->rstartbs;
557   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
558   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
559   PetscReal          tmp;
560   MatScalar        **HD = baij->hd, *baij_a;
561   const PetscScalar *v_t, *value;
562   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
563 
564   PetscFunctionBegin;
565   if (roworiented) stepval = (n - 1) * bs;
566   else stepval = (m - 1) * bs;
567 
568   for (i = 0; i < m; i++) {
569     if (PetscDefined(USE_DEBUG)) {
570       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
571       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
572     }
573     row = im[i];
574     v_t = v + i * nbs2;
575     if (row >= rstart && row < rend) {
576       for (j = 0; j < n; j++) {
577         col = in[j];
578 
579         /* Look up into the Hash Table */
580         key = row * Nbs + col + 1;
581         h1  = HASH(size, key, tmp);
582 
583         idx = h1;
584         if (PetscDefined(USE_DEBUG)) {
585           total_ct++;
586           insert_ct++;
587           if (HT[idx] != key) {
588             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
589             if (idx == size) {
590               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
591               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
592             }
593           }
594         } else if (HT[idx] != key) {
595           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
596           if (idx == size) {
597             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
598             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
599           }
600         }
601         baij_a = HD[idx];
602         if (roworiented) {
603           /*value = v + i*(stepval+bs)*bs + j*bs;*/
604           /* value = v + (i*(stepval+bs)+j)*bs; */
605           value = v_t;
606           v_t += bs;
607           if (addv == ADD_VALUES) {
608             for (ii = 0; ii < bs; ii++, value += stepval) {
609               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
610             }
611           } else {
612             for (ii = 0; ii < bs; ii++, value += stepval) {
613               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
614             }
615           }
616         } else {
617           value = v + j * (stepval + bs) * bs + i * bs;
618           if (addv == ADD_VALUES) {
619             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
620               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
621             }
622           } else {
623             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
624               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
625             }
626           }
627         }
628       }
629     } else {
630       if (!baij->donotstash) {
631         if (roworiented) {
632           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
633         } else {
634           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
635         }
636       }
637     }
638   }
639   if (PetscDefined(USE_DEBUG)) {
640     baij->ht_total_ct += total_ct;
641     baij->ht_insert_ct += insert_ct;
642   }
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
646 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
647 {
648   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
649   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
650   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
651 
652   PetscFunctionBegin;
653   for (i = 0; i < m; i++) {
654     if (idxm[i] < 0) continue; /* negative row */
655     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
656     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
657       row = idxm[i] - bsrstart;
658       for (j = 0; j < n; j++) {
659         if (idxn[j] < 0) continue; /* negative column */
660         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
661         if (idxn[j] >= bscstart && idxn[j] < bscend) {
662           col = idxn[j] - bscstart;
663           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
664         } else {
665           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
666 #if defined(PETSC_USE_CTABLE)
667           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
668           data--;
669 #else
670           data = baij->colmap[idxn[j] / bs] - 1;
671 #endif
672           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
673           else {
674             col = data + idxn[j] % bs;
675             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
676           }
677         }
678       }
679     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
680   }
681   PetscFunctionReturn(PETSC_SUCCESS);
682 }
683 
684 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
685 {
686   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
687   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
688   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
689   PetscReal    sum = 0.0;
690   MatScalar   *v;
691 
692   PetscFunctionBegin;
693   if (baij->size == 1) {
694     PetscCall(MatNorm(baij->A, type, nrm));
695   } else {
696     if (type == NORM_FROBENIUS) {
697       v  = amat->a;
698       nz = amat->nz * bs2;
699       for (i = 0; i < nz; i++) {
700         sum += PetscRealPart(PetscConj(*v) * (*v));
701         v++;
702       }
703       v  = bmat->a;
704       nz = bmat->nz * bs2;
705       for (i = 0; i < nz; i++) {
706         sum += PetscRealPart(PetscConj(*v) * (*v));
707         v++;
708       }
709       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
710       *nrm = PetscSqrtReal(*nrm);
711     } else if (type == NORM_1) { /* max column sum */
712       PetscReal *tmp, *tmp2;
713       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
714       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
715       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
716       v  = amat->a;
717       jj = amat->j;
718       for (i = 0; i < amat->nz; i++) {
719         for (j = 0; j < bs; j++) {
720           col = bs * (cstart + *jj) + j; /* column index */
721           for (row = 0; row < bs; row++) {
722             tmp[col] += PetscAbsScalar(*v);
723             v++;
724           }
725         }
726         jj++;
727       }
728       v  = bmat->a;
729       jj = bmat->j;
730       for (i = 0; i < bmat->nz; i++) {
731         for (j = 0; j < bs; j++) {
732           col = bs * garray[*jj] + j;
733           for (row = 0; row < bs; row++) {
734             tmp[col] += PetscAbsScalar(*v);
735             v++;
736           }
737         }
738         jj++;
739       }
740       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
741       *nrm = 0.0;
742       for (j = 0; j < mat->cmap->N; j++) {
743         if (tmp2[j] > *nrm) *nrm = tmp2[j];
744       }
745       PetscCall(PetscFree(tmp));
746       PetscCall(PetscFree(tmp2));
747     } else if (type == NORM_INFINITY) { /* max row sum */
748       PetscReal *sums;
749       PetscCall(PetscMalloc1(bs, &sums));
750       sum = 0.0;
751       for (j = 0; j < amat->mbs; j++) {
752         for (row = 0; row < bs; row++) sums[row] = 0.0;
753         v  = amat->a + bs2 * amat->i[j];
754         nz = amat->i[j + 1] - amat->i[j];
755         for (i = 0; i < nz; i++) {
756           for (col = 0; col < bs; col++) {
757             for (row = 0; row < bs; row++) {
758               sums[row] += PetscAbsScalar(*v);
759               v++;
760             }
761           }
762         }
763         v  = bmat->a + bs2 * bmat->i[j];
764         nz = bmat->i[j + 1] - bmat->i[j];
765         for (i = 0; i < nz; i++) {
766           for (col = 0; col < bs; col++) {
767             for (row = 0; row < bs; row++) {
768               sums[row] += PetscAbsScalar(*v);
769               v++;
770             }
771           }
772         }
773         for (row = 0; row < bs; row++) {
774           if (sums[row] > sum) sum = sums[row];
775         }
776       }
777       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
778       PetscCall(PetscFree(sums));
779     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
780   }
781   PetscFunctionReturn(PETSC_SUCCESS);
782 }
783 
784 /*
785   Creates the hash table, and sets the table
786   This table is created only once.
787   If new entried need to be added to the matrix
788   then the hash table has to be destroyed and
789   recreated.
790 */
791 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
792 {
793   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
794   Mat          A = baij->A, B = baij->B;
795   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
796   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
797   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
798   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
799   PetscInt    *HT, key;
800   MatScalar  **HD;
801   PetscReal    tmp;
802 #if defined(PETSC_USE_INFO)
803   PetscInt ct = 0, max = 0;
804 #endif
805 
806   PetscFunctionBegin;
807   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
808 
809   baij->ht_size = (PetscInt)(factor * nz);
810   ht_size       = baij->ht_size;
811 
812   /* Allocate Memory for Hash Table */
813   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
814   HD = baij->hd;
815   HT = baij->ht;
816 
817   /* Loop Over A */
818   for (i = 0; i < a->mbs; i++) {
819     for (j = ai[i]; j < ai[i + 1]; j++) {
820       row = i + rstart;
821       col = aj[j] + cstart;
822 
823       key = row * Nbs + col + 1;
824       h1  = HASH(ht_size, key, tmp);
825       for (k = 0; k < ht_size; k++) {
826         if (!HT[(h1 + k) % ht_size]) {
827           HT[(h1 + k) % ht_size] = key;
828           HD[(h1 + k) % ht_size] = a->a + j * bs2;
829           break;
830 #if defined(PETSC_USE_INFO)
831         } else {
832           ct++;
833 #endif
834         }
835       }
836 #if defined(PETSC_USE_INFO)
837       if (k > max) max = k;
838 #endif
839     }
840   }
841   /* Loop Over B */
842   for (i = 0; i < b->mbs; i++) {
843     for (j = bi[i]; j < bi[i + 1]; j++) {
844       row = i + rstart;
845       col = garray[bj[j]];
846       key = row * Nbs + col + 1;
847       h1  = HASH(ht_size, key, tmp);
848       for (k = 0; k < ht_size; k++) {
849         if (!HT[(h1 + k) % ht_size]) {
850           HT[(h1 + k) % ht_size] = key;
851           HD[(h1 + k) % ht_size] = b->a + j * bs2;
852           break;
853 #if defined(PETSC_USE_INFO)
854         } else {
855           ct++;
856 #endif
857         }
858       }
859 #if defined(PETSC_USE_INFO)
860       if (k > max) max = k;
861 #endif
862     }
863   }
864 
865   /* Print Summary */
866 #if defined(PETSC_USE_INFO)
867   for (i = 0, j = 0; i < ht_size; i++) {
868     if (HT[i]) j++;
869   }
870   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
871 #endif
872   PetscFunctionReturn(PETSC_SUCCESS);
873 }
874 
875 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
876 {
877   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
878   PetscInt     nstash, reallocs;
879 
880   PetscFunctionBegin;
881   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
882 
883   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
884   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
885   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
886   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
887   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
888   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
889   PetscFunctionReturn(PETSC_SUCCESS);
890 }
891 
892 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
893 {
894   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
895   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
896   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
897   PetscInt    *row, *col;
898   PetscBool    r1, r2, r3, other_disassembled;
899   MatScalar   *val;
900   PetscMPIInt  n;
901 
902   PetscFunctionBegin;
903   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
904   if (!baij->donotstash && !mat->nooffprocentries) {
905     while (1) {
906       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
907       if (!flg) break;
908 
909       for (i = 0; i < n;) {
910         /* Now identify the consecutive vals belonging to the same row */
911         for (j = i, rstart = row[j]; j < n; j++) {
912           if (row[j] != rstart) break;
913         }
914         if (j < n) ncols = j - i;
915         else ncols = n - i;
916         /* Now assemble all these values with a single function call */
917         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
918         i = j;
919       }
920     }
921     PetscCall(MatStashScatterEnd_Private(&mat->stash));
922     /* Now process the block-stash. Since the values are stashed column-oriented,
923        set the row-oriented flag to column-oriented, and after MatSetValues()
924        restore the original flags */
925     r1 = baij->roworiented;
926     r2 = a->roworiented;
927     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
928 
929     baij->roworiented                           = PETSC_FALSE;
930     a->roworiented                              = PETSC_FALSE;
931     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
932     while (1) {
933       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
934       if (!flg) break;
935 
936       for (i = 0; i < n;) {
937         /* Now identify the consecutive vals belonging to the same row */
938         for (j = i, rstart = row[j]; j < n; j++) {
939           if (row[j] != rstart) break;
940         }
941         if (j < n) ncols = j - i;
942         else ncols = n - i;
943         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
944         i = j;
945       }
946     }
947     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
948 
949     baij->roworiented                           = r1;
950     a->roworiented                              = r2;
951     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
952   }
953 
954   PetscCall(MatAssemblyBegin(baij->A, mode));
955   PetscCall(MatAssemblyEnd(baij->A, mode));
956 
957   /* determine if any processor has disassembled, if so we must
958      also disassemble ourselves, in order that we may reassemble. */
959   /*
960      if nonzero structure of submatrix B cannot change then we know that
961      no processor disassembled thus we can skip this stuff
962   */
963   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
964     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
965     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
966   }
967 
968   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
969   PetscCall(MatAssemblyBegin(baij->B, mode));
970   PetscCall(MatAssemblyEnd(baij->B, mode));
971 
972 #if defined(PETSC_USE_INFO)
973   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
974     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
975 
976     baij->ht_total_ct  = 0;
977     baij->ht_insert_ct = 0;
978   }
979 #endif
980   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
981     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
982 
983     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
984     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
985   }
986 
987   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
988 
989   baij->rowvalues = NULL;
990 
991   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
992   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
993     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
994     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
995   }
996   PetscFunctionReturn(PETSC_SUCCESS);
997 }
998 
999 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1000 #include <petscdraw.h>
1001 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1002 {
1003   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1004   PetscMPIInt       rank = baij->rank;
1005   PetscInt          bs   = mat->rmap->bs;
1006   PetscBool         iascii, isdraw;
1007   PetscViewer       sviewer;
1008   PetscViewerFormat format;
1009 
1010   PetscFunctionBegin;
1011   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1012   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1013   if (iascii) {
1014     PetscCall(PetscViewerGetFormat(viewer, &format));
1015     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1016       MatInfo info;
1017       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1018       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1019       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1020       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1021                                                    mat->rmap->bs, (double)info.memory));
1022       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1023       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1024       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1025       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1026       PetscCall(PetscViewerFlush(viewer));
1027       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1028       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1029       PetscCall(VecScatterView(baij->Mvctx, viewer));
1030       PetscFunctionReturn(PETSC_SUCCESS);
1031     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1032       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1033       PetscFunctionReturn(PETSC_SUCCESS);
1034     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1035       PetscFunctionReturn(PETSC_SUCCESS);
1036     }
1037   }
1038 
1039   if (isdraw) {
1040     PetscDraw draw;
1041     PetscBool isnull;
1042     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1043     PetscCall(PetscDrawIsNull(draw, &isnull));
1044     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1045   }
1046 
1047   {
1048     /* assemble the entire matrix onto first processor. */
1049     Mat          A;
1050     Mat_SeqBAIJ *Aloc;
1051     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1052     MatScalar   *a;
1053     const char  *matname;
1054 
1055     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1056     /* Perhaps this should be the type of mat? */
1057     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1058     if (rank == 0) {
1059       PetscCall(MatSetSizes(A, M, N, M, N));
1060     } else {
1061       PetscCall(MatSetSizes(A, 0, 0, M, N));
1062     }
1063     PetscCall(MatSetType(A, MATMPIBAIJ));
1064     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1065     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1066 
1067     /* copy over the A part */
1068     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1069     ai   = Aloc->i;
1070     aj   = Aloc->j;
1071     a    = Aloc->a;
1072     PetscCall(PetscMalloc1(bs, &rvals));
1073 
1074     for (i = 0; i < mbs; i++) {
1075       rvals[0] = bs * (baij->rstartbs + i);
1076       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1077       for (j = ai[i]; j < ai[i + 1]; j++) {
1078         col = (baij->cstartbs + aj[j]) * bs;
1079         for (k = 0; k < bs; k++) {
1080           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1081           col++;
1082           a += bs;
1083         }
1084       }
1085     }
1086     /* copy over the B part */
1087     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1088     ai   = Aloc->i;
1089     aj   = Aloc->j;
1090     a    = Aloc->a;
1091     for (i = 0; i < mbs; i++) {
1092       rvals[0] = bs * (baij->rstartbs + i);
1093       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1094       for (j = ai[i]; j < ai[i + 1]; j++) {
1095         col = baij->garray[aj[j]] * bs;
1096         for (k = 0; k < bs; k++) {
1097           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1098           col++;
1099           a += bs;
1100         }
1101       }
1102     }
1103     PetscCall(PetscFree(rvals));
1104     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1105     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1106     /*
1107        Everyone has to call to draw the matrix since the graphics waits are
1108        synchronized across all processors that share the PetscDraw object
1109     */
1110     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1111     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1112     if (rank == 0) {
1113       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1114       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1115     }
1116     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1117     PetscCall(MatDestroy(&A));
1118   }
1119   PetscFunctionReturn(PETSC_SUCCESS);
1120 }
1121 
1122 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1123 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1124 {
1125   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1126   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1127   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1128   const PetscInt *garray = aij->garray;
1129   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1130   PetscInt64      nz, hnz;
1131   PetscInt       *rowlens, *colidxs;
1132   PetscScalar    *matvals;
1133   PetscMPIInt     rank;
1134 
1135   PetscFunctionBegin;
1136   PetscCall(PetscViewerSetUp(viewer));
1137 
1138   M  = mat->rmap->N;
1139   N  = mat->cmap->N;
1140   m  = mat->rmap->n;
1141   rs = mat->rmap->rstart;
1142   cs = mat->cmap->rstart;
1143   bs = mat->rmap->bs;
1144   nz = bs * bs * (A->nz + B->nz);
1145 
1146   /* write matrix header */
1147   header[0] = MAT_FILE_CLASSID;
1148   header[1] = M;
1149   header[2] = N;
1150   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1151   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1152   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1153   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1154 
1155   /* fill in and store row lengths */
1156   PetscCall(PetscMalloc1(m, &rowlens));
1157   for (cnt = 0, i = 0; i < A->mbs; i++)
1158     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1159   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1160   PetscCall(PetscFree(rowlens));
1161 
1162   /* fill in and store column indices */
1163   PetscCall(PetscMalloc1(nz, &colidxs));
1164   for (cnt = 0, i = 0; i < A->mbs; i++) {
1165     for (k = 0; k < bs; k++) {
1166       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1167         if (garray[B->j[jb]] > cs / bs) break;
1168         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1169       }
1170       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1171         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1172       for (; jb < B->i[i + 1]; jb++)
1173         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1174     }
1175   }
1176   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1177   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1178   PetscCall(PetscFree(colidxs));
1179 
1180   /* fill in and store nonzero values */
1181   PetscCall(PetscMalloc1(nz, &matvals));
1182   for (cnt = 0, i = 0; i < A->mbs; i++) {
1183     for (k = 0; k < bs; k++) {
1184       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1185         if (garray[B->j[jb]] > cs / bs) break;
1186         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1187       }
1188       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1189         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1190       for (; jb < B->i[i + 1]; jb++)
1191         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1192     }
1193   }
1194   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1195   PetscCall(PetscFree(matvals));
1196 
1197   /* write block size option to the viewer's .info file */
1198   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1199   PetscFunctionReturn(PETSC_SUCCESS);
1200 }
1201 
1202 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1203 {
1204   PetscBool iascii, isdraw, issocket, isbinary;
1205 
1206   PetscFunctionBegin;
1207   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1208   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1209   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1210   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1211   if (iascii || isdraw || issocket) {
1212     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1213   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1214   PetscFunctionReturn(PETSC_SUCCESS);
1215 }
1216 
1217 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1218 {
1219   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1220   PetscInt     nt;
1221 
1222   PetscFunctionBegin;
1223   PetscCall(VecGetLocalSize(xx, &nt));
1224   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1225   PetscCall(VecGetLocalSize(yy, &nt));
1226   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1227   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1228   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1229   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1230   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1231   PetscFunctionReturn(PETSC_SUCCESS);
1232 }
1233 
1234 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1235 {
1236   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1237 
1238   PetscFunctionBegin;
1239   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1240   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1241   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1242   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1243   PetscFunctionReturn(PETSC_SUCCESS);
1244 }
1245 
1246 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1247 {
1248   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1249 
1250   PetscFunctionBegin;
1251   /* do nondiagonal part */
1252   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1253   /* do local part */
1254   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1255   /* add partial results together */
1256   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1257   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1258   PetscFunctionReturn(PETSC_SUCCESS);
1259 }
1260 
1261 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1262 {
1263   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1264 
1265   PetscFunctionBegin;
1266   /* do nondiagonal part */
1267   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1268   /* do local part */
1269   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1270   /* add partial results together */
1271   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1272   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1273   PetscFunctionReturn(PETSC_SUCCESS);
1274 }
1275 
1276 /*
1277   This only works correctly for square matrices where the subblock A->A is the
1278    diagonal block
1279 */
1280 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1281 {
1282   PetscFunctionBegin;
1283   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1284   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1285   PetscFunctionReturn(PETSC_SUCCESS);
1286 }
1287 
1288 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1289 {
1290   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1291 
1292   PetscFunctionBegin;
1293   PetscCall(MatScale(a->A, aa));
1294   PetscCall(MatScale(a->B, aa));
1295   PetscFunctionReturn(PETSC_SUCCESS);
1296 }
1297 
1298 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1299 {
1300   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1301   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1302   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1303   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1304   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1305 
1306   PetscFunctionBegin;
1307   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1308   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1309   mat->getrowactive = PETSC_TRUE;
1310 
1311   if (!mat->rowvalues && (idx || v)) {
1312     /*
1313         allocate enough space to hold information from the longest row.
1314     */
1315     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1316     PetscInt     max = 1, mbs = mat->mbs, tmp;
1317     for (i = 0; i < mbs; i++) {
1318       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1319       if (max < tmp) max = tmp;
1320     }
1321     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1322   }
1323   lrow = row - brstart;
1324 
1325   pvA = &vworkA;
1326   pcA = &cworkA;
1327   pvB = &vworkB;
1328   pcB = &cworkB;
1329   if (!v) {
1330     pvA = NULL;
1331     pvB = NULL;
1332   }
1333   if (!idx) {
1334     pcA = NULL;
1335     if (!v) pcB = NULL;
1336   }
1337   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1338   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1339   nztot = nzA + nzB;
1340 
1341   cmap = mat->garray;
1342   if (v || idx) {
1343     if (nztot) {
1344       /* Sort by increasing column numbers, assuming A and B already sorted */
1345       PetscInt imark = -1;
1346       if (v) {
1347         *v = v_p = mat->rowvalues;
1348         for (i = 0; i < nzB; i++) {
1349           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1350           else break;
1351         }
1352         imark = i;
1353         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1354         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1355       }
1356       if (idx) {
1357         *idx = idx_p = mat->rowindices;
1358         if (imark > -1) {
1359           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1360         } else {
1361           for (i = 0; i < nzB; i++) {
1362             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1363             else break;
1364           }
1365           imark = i;
1366         }
1367         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1368         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1369       }
1370     } else {
1371       if (idx) *idx = NULL;
1372       if (v) *v = NULL;
1373     }
1374   }
1375   *nz = nztot;
1376   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1377   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1378   PetscFunctionReturn(PETSC_SUCCESS);
1379 }
1380 
1381 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1382 {
1383   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1384 
1385   PetscFunctionBegin;
1386   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1387   baij->getrowactive = PETSC_FALSE;
1388   PetscFunctionReturn(PETSC_SUCCESS);
1389 }
1390 
1391 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1392 {
1393   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1394 
1395   PetscFunctionBegin;
1396   PetscCall(MatZeroEntries(l->A));
1397   PetscCall(MatZeroEntries(l->B));
1398   PetscFunctionReturn(PETSC_SUCCESS);
1399 }
1400 
1401 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1402 {
1403   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1404   Mat            A = a->A, B = a->B;
1405   PetscLogDouble isend[5], irecv[5];
1406 
1407   PetscFunctionBegin;
1408   info->block_size = (PetscReal)matin->rmap->bs;
1409 
1410   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1411 
1412   isend[0] = info->nz_used;
1413   isend[1] = info->nz_allocated;
1414   isend[2] = info->nz_unneeded;
1415   isend[3] = info->memory;
1416   isend[4] = info->mallocs;
1417 
1418   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1419 
1420   isend[0] += info->nz_used;
1421   isend[1] += info->nz_allocated;
1422   isend[2] += info->nz_unneeded;
1423   isend[3] += info->memory;
1424   isend[4] += info->mallocs;
1425 
1426   if (flag == MAT_LOCAL) {
1427     info->nz_used      = isend[0];
1428     info->nz_allocated = isend[1];
1429     info->nz_unneeded  = isend[2];
1430     info->memory       = isend[3];
1431     info->mallocs      = isend[4];
1432   } else if (flag == MAT_GLOBAL_MAX) {
1433     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1434 
1435     info->nz_used      = irecv[0];
1436     info->nz_allocated = irecv[1];
1437     info->nz_unneeded  = irecv[2];
1438     info->memory       = irecv[3];
1439     info->mallocs      = irecv[4];
1440   } else if (flag == MAT_GLOBAL_SUM) {
1441     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1442 
1443     info->nz_used      = irecv[0];
1444     info->nz_allocated = irecv[1];
1445     info->nz_unneeded  = irecv[2];
1446     info->memory       = irecv[3];
1447     info->mallocs      = irecv[4];
1448   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1449   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1450   info->fill_ratio_needed = 0;
1451   info->factor_mallocs    = 0;
1452   PetscFunctionReturn(PETSC_SUCCESS);
1453 }
1454 
1455 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1456 {
1457   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1458 
1459   PetscFunctionBegin;
1460   switch (op) {
1461   case MAT_NEW_NONZERO_LOCATIONS:
1462   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1463   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1464   case MAT_KEEP_NONZERO_PATTERN:
1465   case MAT_NEW_NONZERO_LOCATION_ERR:
1466     MatCheckPreallocated(A, 1);
1467     PetscCall(MatSetOption(a->A, op, flg));
1468     PetscCall(MatSetOption(a->B, op, flg));
1469     break;
1470   case MAT_ROW_ORIENTED:
1471     MatCheckPreallocated(A, 1);
1472     a->roworiented = flg;
1473 
1474     PetscCall(MatSetOption(a->A, op, flg));
1475     PetscCall(MatSetOption(a->B, op, flg));
1476     break;
1477   case MAT_FORCE_DIAGONAL_ENTRIES:
1478   case MAT_SORTED_FULL:
1479     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1480     break;
1481   case MAT_IGNORE_OFF_PROC_ENTRIES:
1482     a->donotstash = flg;
1483     break;
1484   case MAT_USE_HASH_TABLE:
1485     a->ht_flag = flg;
1486     a->ht_fact = 1.39;
1487     break;
1488   case MAT_SYMMETRIC:
1489   case MAT_STRUCTURALLY_SYMMETRIC:
1490   case MAT_HERMITIAN:
1491   case MAT_SUBMAT_SINGLEIS:
1492   case MAT_SYMMETRY_ETERNAL:
1493   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1494   case MAT_SPD_ETERNAL:
1495     /* if the diagonal matrix is square it inherits some of the properties above */
1496     break;
1497   default:
1498     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1499   }
1500   PetscFunctionReturn(PETSC_SUCCESS);
1501 }
1502 
1503 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1504 {
1505   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1506   Mat_SeqBAIJ *Aloc;
1507   Mat          B;
1508   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1509   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1510   MatScalar   *a;
1511 
1512   PetscFunctionBegin;
1513   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1514   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1515     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1516     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1517     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1518     /* Do not know preallocation information, but must set block size */
1519     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1520   } else {
1521     B = *matout;
1522   }
1523 
1524   /* copy over the A part */
1525   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1526   ai   = Aloc->i;
1527   aj   = Aloc->j;
1528   a    = Aloc->a;
1529   PetscCall(PetscMalloc1(bs, &rvals));
1530 
1531   for (i = 0; i < mbs; i++) {
1532     rvals[0] = bs * (baij->rstartbs + i);
1533     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1534     for (j = ai[i]; j < ai[i + 1]; j++) {
1535       col = (baij->cstartbs + aj[j]) * bs;
1536       for (k = 0; k < bs; k++) {
1537         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1538 
1539         col++;
1540         a += bs;
1541       }
1542     }
1543   }
1544   /* copy over the B part */
1545   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1546   ai   = Aloc->i;
1547   aj   = Aloc->j;
1548   a    = Aloc->a;
1549   for (i = 0; i < mbs; i++) {
1550     rvals[0] = bs * (baij->rstartbs + i);
1551     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1552     for (j = ai[i]; j < ai[i + 1]; j++) {
1553       col = baij->garray[aj[j]] * bs;
1554       for (k = 0; k < bs; k++) {
1555         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1556         col++;
1557         a += bs;
1558       }
1559     }
1560   }
1561   PetscCall(PetscFree(rvals));
1562   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1563   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1564 
1565   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1566   else PetscCall(MatHeaderMerge(A, &B));
1567   PetscFunctionReturn(PETSC_SUCCESS);
1568 }
1569 
1570 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1571 {
1572   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1573   Mat          a = baij->A, b = baij->B;
1574   PetscInt     s1, s2, s3;
1575 
1576   PetscFunctionBegin;
1577   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1578   if (rr) {
1579     PetscCall(VecGetLocalSize(rr, &s1));
1580     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1581     /* Overlap communication with computation. */
1582     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1583   }
1584   if (ll) {
1585     PetscCall(VecGetLocalSize(ll, &s1));
1586     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1587     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1588   }
1589   /* scale  the diagonal block */
1590   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1591 
1592   if (rr) {
1593     /* Do a scatter end and then right scale the off-diagonal block */
1594     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1595     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1596   }
1597   PetscFunctionReturn(PETSC_SUCCESS);
1598 }
1599 
1600 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1601 {
1602   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1603   PetscInt    *lrows;
1604   PetscInt     r, len;
1605   PetscBool    cong;
1606 
1607   PetscFunctionBegin;
1608   /* get locally owned rows */
1609   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1610   /* fix right-hand side if needed */
1611   if (x && b) {
1612     const PetscScalar *xx;
1613     PetscScalar       *bb;
1614 
1615     PetscCall(VecGetArrayRead(x, &xx));
1616     PetscCall(VecGetArray(b, &bb));
1617     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1618     PetscCall(VecRestoreArrayRead(x, &xx));
1619     PetscCall(VecRestoreArray(b, &bb));
1620   }
1621 
1622   /* actually zap the local rows */
1623   /*
1624         Zero the required rows. If the "diagonal block" of the matrix
1625      is square and the user wishes to set the diagonal we use separate
1626      code so that MatSetValues() is not called for each diagonal allocating
1627      new memory, thus calling lots of mallocs and slowing things down.
1628 
1629   */
1630   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1631   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1632   PetscCall(MatHasCongruentLayouts(A, &cong));
1633   if ((diag != 0.0) && cong) {
1634     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1635   } else if (diag != 0.0) {
1636     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1637     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1638     for (r = 0; r < len; ++r) {
1639       const PetscInt row = lrows[r] + A->rmap->rstart;
1640       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1641     }
1642     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1643     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1644   } else {
1645     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1646   }
1647   PetscCall(PetscFree(lrows));
1648 
1649   /* only change matrix nonzero state if pattern was allowed to be changed */
1650   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1651     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1652     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1653   }
1654   PetscFunctionReturn(PETSC_SUCCESS);
1655 }
1656 
1657 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1658 {
1659   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1660   PetscMPIInt        n = A->rmap->n, p = 0;
1661   PetscInt           i, j, k, r, len = 0, row, col, count;
1662   PetscInt          *lrows, *owners = A->rmap->range;
1663   PetscSFNode       *rrows;
1664   PetscSF            sf;
1665   const PetscScalar *xx;
1666   PetscScalar       *bb, *mask;
1667   Vec                xmask, lmask;
1668   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1669   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1670   PetscScalar       *aa;
1671 
1672   PetscFunctionBegin;
1673   /* Create SF where leaves are input rows and roots are owned rows */
1674   PetscCall(PetscMalloc1(n, &lrows));
1675   for (r = 0; r < n; ++r) lrows[r] = -1;
1676   PetscCall(PetscMalloc1(N, &rrows));
1677   for (r = 0; r < N; ++r) {
1678     const PetscInt idx = rows[r];
1679     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1680     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1681       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1682     }
1683     rrows[r].rank  = p;
1684     rrows[r].index = rows[r] - owners[p];
1685   }
1686   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1687   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1688   /* Collect flags for rows to be zeroed */
1689   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1690   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1691   PetscCall(PetscSFDestroy(&sf));
1692   /* Compress and put in row numbers */
1693   for (r = 0; r < n; ++r)
1694     if (lrows[r] >= 0) lrows[len++] = r;
1695   /* zero diagonal part of matrix */
1696   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1697   /* handle off-diagonal part of matrix */
1698   PetscCall(MatCreateVecs(A, &xmask, NULL));
1699   PetscCall(VecDuplicate(l->lvec, &lmask));
1700   PetscCall(VecGetArray(xmask, &bb));
1701   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1702   PetscCall(VecRestoreArray(xmask, &bb));
1703   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1704   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1705   PetscCall(VecDestroy(&xmask));
1706   if (x) {
1707     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1708     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1709     PetscCall(VecGetArrayRead(l->lvec, &xx));
1710     PetscCall(VecGetArray(b, &bb));
1711   }
1712   PetscCall(VecGetArray(lmask, &mask));
1713   /* remove zeroed rows of off-diagonal matrix */
1714   for (i = 0; i < len; ++i) {
1715     row   = lrows[i];
1716     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1717     aa    = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs);
1718     for (k = 0; k < count; ++k) {
1719       aa[0] = 0.0;
1720       aa += bs;
1721     }
1722   }
1723   /* loop over all elements of off process part of matrix zeroing removed columns*/
1724   for (i = 0; i < l->B->rmap->N; ++i) {
1725     row = i / bs;
1726     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1727       for (k = 0; k < bs; ++k) {
1728         col = bs * baij->j[j] + k;
1729         if (PetscAbsScalar(mask[col])) {
1730           aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k;
1731           if (x) bb[i] -= aa[0] * xx[col];
1732           aa[0] = 0.0;
1733         }
1734       }
1735     }
1736   }
1737   if (x) {
1738     PetscCall(VecRestoreArray(b, &bb));
1739     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1740   }
1741   PetscCall(VecRestoreArray(lmask, &mask));
1742   PetscCall(VecDestroy(&lmask));
1743   PetscCall(PetscFree(lrows));
1744 
1745   /* only change matrix nonzero state if pattern was allowed to be changed */
1746   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1747     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1748     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1749   }
1750   PetscFunctionReturn(PETSC_SUCCESS);
1751 }
1752 
1753 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1754 {
1755   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1756 
1757   PetscFunctionBegin;
1758   PetscCall(MatSetUnfactored(a->A));
1759   PetscFunctionReturn(PETSC_SUCCESS);
1760 }
1761 
1762 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1763 
1764 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1765 {
1766   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1767   Mat          a, b, c, d;
1768   PetscBool    flg;
1769 
1770   PetscFunctionBegin;
1771   a = matA->A;
1772   b = matA->B;
1773   c = matB->A;
1774   d = matB->B;
1775 
1776   PetscCall(MatEqual(a, c, &flg));
1777   if (flg) PetscCall(MatEqual(b, d, &flg));
1778   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1779   PetscFunctionReturn(PETSC_SUCCESS);
1780 }
1781 
1782 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1783 {
1784   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1785   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1786 
1787   PetscFunctionBegin;
1788   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1789   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1790     PetscCall(MatCopy_Basic(A, B, str));
1791   } else {
1792     PetscCall(MatCopy(a->A, b->A, str));
1793     PetscCall(MatCopy(a->B, b->B, str));
1794   }
1795   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1796   PetscFunctionReturn(PETSC_SUCCESS);
1797 }
1798 
1799 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1800 {
1801   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1802   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1803   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1804 
1805   PetscFunctionBegin;
1806   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1807   PetscFunctionReturn(PETSC_SUCCESS);
1808 }
1809 
1810 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1811 {
1812   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1813   PetscBLASInt bnz, one                         = 1;
1814   Mat_SeqBAIJ *x, *y;
1815   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1816 
1817   PetscFunctionBegin;
1818   if (str == SAME_NONZERO_PATTERN) {
1819     PetscScalar alpha = a;
1820     x                 = (Mat_SeqBAIJ *)xx->A->data;
1821     y                 = (Mat_SeqBAIJ *)yy->A->data;
1822     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1823     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1824     x = (Mat_SeqBAIJ *)xx->B->data;
1825     y = (Mat_SeqBAIJ *)yy->B->data;
1826     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1827     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1828     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1829   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1830     PetscCall(MatAXPY_Basic(Y, a, X, str));
1831   } else {
1832     Mat       B;
1833     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1834     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1835     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1836     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1837     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1838     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1839     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1840     PetscCall(MatSetType(B, MATMPIBAIJ));
1841     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1842     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1843     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1844     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1845     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1846     PetscCall(MatHeaderMerge(Y, &B));
1847     PetscCall(PetscFree(nnz_d));
1848     PetscCall(PetscFree(nnz_o));
1849   }
1850   PetscFunctionReturn(PETSC_SUCCESS);
1851 }
1852 
1853 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1854 {
1855   PetscFunctionBegin;
1856   if (PetscDefined(USE_COMPLEX)) {
1857     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1858 
1859     PetscCall(MatConjugate_SeqBAIJ(a->A));
1860     PetscCall(MatConjugate_SeqBAIJ(a->B));
1861   }
1862   PetscFunctionReturn(PETSC_SUCCESS);
1863 }
1864 
1865 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1866 {
1867   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1868 
1869   PetscFunctionBegin;
1870   PetscCall(MatRealPart(a->A));
1871   PetscCall(MatRealPart(a->B));
1872   PetscFunctionReturn(PETSC_SUCCESS);
1873 }
1874 
1875 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1876 {
1877   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1878 
1879   PetscFunctionBegin;
1880   PetscCall(MatImaginaryPart(a->A));
1881   PetscCall(MatImaginaryPart(a->B));
1882   PetscFunctionReturn(PETSC_SUCCESS);
1883 }
1884 
1885 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1886 {
1887   IS       iscol_local;
1888   PetscInt csize;
1889 
1890   PetscFunctionBegin;
1891   PetscCall(ISGetLocalSize(iscol, &csize));
1892   if (call == MAT_REUSE_MATRIX) {
1893     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1894     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1895   } else {
1896     PetscCall(ISAllGather(iscol, &iscol_local));
1897   }
1898   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1899   if (call == MAT_INITIAL_MATRIX) {
1900     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1901     PetscCall(ISDestroy(&iscol_local));
1902   }
1903   PetscFunctionReturn(PETSC_SUCCESS);
1904 }
1905 
1906 /*
1907   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1908   in local and then by concatenating the local matrices the end result.
1909   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1910   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1911 */
1912 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1913 {
1914   PetscMPIInt  rank, size;
1915   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1916   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1917   Mat          M, Mreuse;
1918   MatScalar   *vwork, *aa;
1919   MPI_Comm     comm;
1920   IS           isrow_new, iscol_new;
1921   Mat_SeqBAIJ *aij;
1922 
1923   PetscFunctionBegin;
1924   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1925   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1926   PetscCallMPI(MPI_Comm_size(comm, &size));
1927   /* The compression and expansion should be avoided. Doesn't point
1928      out errors, might change the indices, hence buggey */
1929   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1930   if (isrow == iscol) {
1931     iscol_new = isrow_new;
1932     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1933   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1934 
1935   if (call == MAT_REUSE_MATRIX) {
1936     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1937     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1938     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1939   } else {
1940     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1941   }
1942   PetscCall(ISDestroy(&isrow_new));
1943   PetscCall(ISDestroy(&iscol_new));
1944   /*
1945       m - number of local rows
1946       n - number of columns (same on all processors)
1947       rstart - first row in new global matrix generated
1948   */
1949   PetscCall(MatGetBlockSize(mat, &bs));
1950   PetscCall(MatGetSize(Mreuse, &m, &n));
1951   m = m / bs;
1952   n = n / bs;
1953 
1954   if (call == MAT_INITIAL_MATRIX) {
1955     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1956     ii  = aij->i;
1957     jj  = aij->j;
1958 
1959     /*
1960         Determine the number of non-zeros in the diagonal and off-diagonal
1961         portions of the matrix in order to do correct preallocation
1962     */
1963 
1964     /* first get start and end of "diagonal" columns */
1965     if (csize == PETSC_DECIDE) {
1966       PetscCall(ISGetSize(isrow, &mglobal));
1967       if (mglobal == n * bs) { /* square matrix */
1968         nlocal = m;
1969       } else {
1970         nlocal = n / size + ((n % size) > rank);
1971       }
1972     } else {
1973       nlocal = csize / bs;
1974     }
1975     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1976     rstart = rend - nlocal;
1977     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1978 
1979     /* next, compute all the lengths */
1980     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1981     for (i = 0; i < m; i++) {
1982       jend = ii[i + 1] - ii[i];
1983       olen = 0;
1984       dlen = 0;
1985       for (j = 0; j < jend; j++) {
1986         if (*jj < rstart || *jj >= rend) olen++;
1987         else dlen++;
1988         jj++;
1989       }
1990       olens[i] = olen;
1991       dlens[i] = dlen;
1992     }
1993     PetscCall(MatCreate(comm, &M));
1994     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1995     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
1996     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1997     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1998     PetscCall(PetscFree2(dlens, olens));
1999   } else {
2000     PetscInt ml, nl;
2001 
2002     M = *newmat;
2003     PetscCall(MatGetLocalSize(M, &ml, &nl));
2004     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2005     PetscCall(MatZeroEntries(M));
2006     /*
2007          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2008        rather than the slower MatSetValues().
2009     */
2010     M->was_assembled = PETSC_TRUE;
2011     M->assembled     = PETSC_FALSE;
2012   }
2013   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2014   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2015   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2016   ii  = aij->i;
2017   jj  = aij->j;
2018   aa  = aij->a;
2019   for (i = 0; i < m; i++) {
2020     row   = rstart / bs + i;
2021     nz    = ii[i + 1] - ii[i];
2022     cwork = jj;
2023     jj    = PetscSafePointerPlusOffset(jj, nz);
2024     vwork = aa;
2025     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2026     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2027   }
2028 
2029   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2030   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2031   *newmat = M;
2032 
2033   /* save submatrix used in processor for next request */
2034   if (call == MAT_INITIAL_MATRIX) {
2035     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2036     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2037   }
2038   PetscFunctionReturn(PETSC_SUCCESS);
2039 }
2040 
2041 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2042 {
2043   MPI_Comm        comm, pcomm;
2044   PetscInt        clocal_size, nrows;
2045   const PetscInt *rows;
2046   PetscMPIInt     size;
2047   IS              crowp, lcolp;
2048 
2049   PetscFunctionBegin;
2050   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2051   /* make a collective version of 'rowp' */
2052   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2053   if (pcomm == comm) {
2054     crowp = rowp;
2055   } else {
2056     PetscCall(ISGetSize(rowp, &nrows));
2057     PetscCall(ISGetIndices(rowp, &rows));
2058     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2059     PetscCall(ISRestoreIndices(rowp, &rows));
2060   }
2061   PetscCall(ISSetPermutation(crowp));
2062   /* make a local version of 'colp' */
2063   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2064   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2065   if (size == 1) {
2066     lcolp = colp;
2067   } else {
2068     PetscCall(ISAllGather(colp, &lcolp));
2069   }
2070   PetscCall(ISSetPermutation(lcolp));
2071   /* now we just get the submatrix */
2072   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2073   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2074   /* clean up */
2075   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2076   if (size > 1) PetscCall(ISDestroy(&lcolp));
2077   PetscFunctionReturn(PETSC_SUCCESS);
2078 }
2079 
2080 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2081 {
2082   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2083   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2084 
2085   PetscFunctionBegin;
2086   if (nghosts) *nghosts = B->nbs;
2087   if (ghosts) *ghosts = baij->garray;
2088   PetscFunctionReturn(PETSC_SUCCESS);
2089 }
2090 
2091 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2092 {
2093   Mat          B;
2094   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2095   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2096   Mat_SeqAIJ  *b;
2097   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2098   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2099   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2100 
2101   PetscFunctionBegin;
2102   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2103   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2104 
2105   /*   Tell every processor the number of nonzeros per row  */
2106   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2107   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2108   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2109   displs = recvcounts + size;
2110   for (i = 0; i < size; i++) {
2111     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2112     displs[i]     = A->rmap->range[i] / bs;
2113   }
2114   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2115   /* Create the sequential matrix of the same type as the local block diagonal  */
2116   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2117   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2118   PetscCall(MatSetType(B, MATSEQAIJ));
2119   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2120   b = (Mat_SeqAIJ *)B->data;
2121 
2122   /*     Copy my part of matrix column indices over  */
2123   sendcount  = ad->nz + bd->nz;
2124   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2125   a_jsendbuf = ad->j;
2126   b_jsendbuf = bd->j;
2127   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2128   cnt        = 0;
2129   for (i = 0; i < n; i++) {
2130     /* put in lower diagonal portion */
2131     m = bd->i[i + 1] - bd->i[i];
2132     while (m > 0) {
2133       /* is it above diagonal (in bd (compressed) numbering) */
2134       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2135       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2136       m--;
2137     }
2138 
2139     /* put in diagonal portion */
2140     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2141 
2142     /* put in upper diagonal portion */
2143     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2144   }
2145   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2146 
2147   /*  Gather all column indices to all processors  */
2148   for (i = 0; i < size; i++) {
2149     recvcounts[i] = 0;
2150     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2151   }
2152   displs[0] = 0;
2153   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2154   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2155   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2156   /* set the b->ilen (length of each row) values */
2157   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2158   /* set the b->i indices */
2159   b->i[0] = 0;
2160   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2161   PetscCall(PetscFree(lens));
2162   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2163   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2164   PetscCall(PetscFree(recvcounts));
2165 
2166   PetscCall(MatPropagateSymmetryOptions(A, B));
2167   *newmat = B;
2168   PetscFunctionReturn(PETSC_SUCCESS);
2169 }
2170 
2171 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2172 {
2173   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2174   Vec          bb1 = NULL;
2175 
2176   PetscFunctionBegin;
2177   if (flag == SOR_APPLY_UPPER) {
2178     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2179     PetscFunctionReturn(PETSC_SUCCESS);
2180   }
2181 
2182   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2183 
2184   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2185     if (flag & SOR_ZERO_INITIAL_GUESS) {
2186       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2187       its--;
2188     }
2189 
2190     while (its--) {
2191       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2192       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2193 
2194       /* update rhs: bb1 = bb - B*x */
2195       PetscCall(VecScale(mat->lvec, -1.0));
2196       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2197 
2198       /* local sweep */
2199       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2200     }
2201   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2202     if (flag & SOR_ZERO_INITIAL_GUESS) {
2203       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2204       its--;
2205     }
2206     while (its--) {
2207       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2208       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2209 
2210       /* update rhs: bb1 = bb - B*x */
2211       PetscCall(VecScale(mat->lvec, -1.0));
2212       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2213 
2214       /* local sweep */
2215       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2216     }
2217   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2218     if (flag & SOR_ZERO_INITIAL_GUESS) {
2219       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2220       its--;
2221     }
2222     while (its--) {
2223       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2224       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2225 
2226       /* update rhs: bb1 = bb - B*x */
2227       PetscCall(VecScale(mat->lvec, -1.0));
2228       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2229 
2230       /* local sweep */
2231       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2232     }
2233   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2234 
2235   PetscCall(VecDestroy(&bb1));
2236   PetscFunctionReturn(PETSC_SUCCESS);
2237 }
2238 
2239 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2240 {
2241   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2242   PetscInt     m, N, i, *garray = aij->garray;
2243   PetscInt     ib, jb, bs = A->rmap->bs;
2244   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2245   MatScalar   *a_val = a_aij->a;
2246   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2247   MatScalar   *b_val = b_aij->a;
2248   PetscReal   *work;
2249 
2250   PetscFunctionBegin;
2251   PetscCall(MatGetSize(A, &m, &N));
2252   PetscCall(PetscCalloc1(N, &work));
2253   if (type == NORM_2) {
2254     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2255       for (jb = 0; jb < bs; jb++) {
2256         for (ib = 0; ib < bs; ib++) {
2257           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2258           a_val++;
2259         }
2260       }
2261     }
2262     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2263       for (jb = 0; jb < bs; jb++) {
2264         for (ib = 0; ib < bs; ib++) {
2265           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2266           b_val++;
2267         }
2268       }
2269     }
2270   } else if (type == NORM_1) {
2271     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2272       for (jb = 0; jb < bs; jb++) {
2273         for (ib = 0; ib < bs; ib++) {
2274           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2275           a_val++;
2276         }
2277       }
2278     }
2279     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2280       for (jb = 0; jb < bs; jb++) {
2281         for (ib = 0; ib < bs; ib++) {
2282           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2283           b_val++;
2284         }
2285       }
2286     }
2287   } else if (type == NORM_INFINITY) {
2288     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2289       for (jb = 0; jb < bs; jb++) {
2290         for (ib = 0; ib < bs; ib++) {
2291           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2292           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2293           a_val++;
2294         }
2295       }
2296     }
2297     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2298       for (jb = 0; jb < bs; jb++) {
2299         for (ib = 0; ib < bs; ib++) {
2300           int col   = garray[b_aij->j[i]] * bs + jb;
2301           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2302           b_val++;
2303         }
2304       }
2305     }
2306   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2307     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2308       for (jb = 0; jb < bs; jb++) {
2309         for (ib = 0; ib < bs; ib++) {
2310           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2311           a_val++;
2312         }
2313       }
2314     }
2315     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2316       for (jb = 0; jb < bs; jb++) {
2317         for (ib = 0; ib < bs; ib++) {
2318           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2319           b_val++;
2320         }
2321       }
2322     }
2323   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2324     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2325       for (jb = 0; jb < bs; jb++) {
2326         for (ib = 0; ib < bs; ib++) {
2327           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2328           a_val++;
2329         }
2330       }
2331     }
2332     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2333       for (jb = 0; jb < bs; jb++) {
2334         for (ib = 0; ib < bs; ib++) {
2335           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2336           b_val++;
2337         }
2338       }
2339     }
2340   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2341   if (type == NORM_INFINITY) {
2342     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2343   } else {
2344     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2345   }
2346   PetscCall(PetscFree(work));
2347   if (type == NORM_2) {
2348     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2349   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2350     for (i = 0; i < N; i++) reductions[i] /= m;
2351   }
2352   PetscFunctionReturn(PETSC_SUCCESS);
2353 }
2354 
2355 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2356 {
2357   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2358 
2359   PetscFunctionBegin;
2360   PetscCall(MatInvertBlockDiagonal(a->A, values));
2361   A->factorerrortype             = a->A->factorerrortype;
2362   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2363   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2364   PetscFunctionReturn(PETSC_SUCCESS);
2365 }
2366 
2367 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2368 {
2369   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2370   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2371 
2372   PetscFunctionBegin;
2373   if (!Y->preallocated) {
2374     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2375   } else if (!aij->nz) {
2376     PetscInt nonew = aij->nonew;
2377     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2378     aij->nonew = nonew;
2379   }
2380   PetscCall(MatShift_Basic(Y, a));
2381   PetscFunctionReturn(PETSC_SUCCESS);
2382 }
2383 
2384 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2385 {
2386   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2387 
2388   PetscFunctionBegin;
2389   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2390   PetscCall(MatMissingDiagonal(a->A, missing, d));
2391   if (d) {
2392     PetscInt rstart;
2393     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2394     *d += rstart / A->rmap->bs;
2395   }
2396   PetscFunctionReturn(PETSC_SUCCESS);
2397 }
2398 
2399 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2400 {
2401   PetscFunctionBegin;
2402   *a = ((Mat_MPIBAIJ *)A->data)->A;
2403   PetscFunctionReturn(PETSC_SUCCESS);
2404 }
2405 
2406 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2407 {
2408   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2409 
2410   PetscFunctionBegin;
2411   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2412   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2413   PetscFunctionReturn(PETSC_SUCCESS);
2414 }
2415 
2416 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2417                                        MatGetRow_MPIBAIJ,
2418                                        MatRestoreRow_MPIBAIJ,
2419                                        MatMult_MPIBAIJ,
2420                                        /* 4*/ MatMultAdd_MPIBAIJ,
2421                                        MatMultTranspose_MPIBAIJ,
2422                                        MatMultTransposeAdd_MPIBAIJ,
2423                                        NULL,
2424                                        NULL,
2425                                        NULL,
2426                                        /*10*/ NULL,
2427                                        NULL,
2428                                        NULL,
2429                                        MatSOR_MPIBAIJ,
2430                                        MatTranspose_MPIBAIJ,
2431                                        /*15*/ MatGetInfo_MPIBAIJ,
2432                                        MatEqual_MPIBAIJ,
2433                                        MatGetDiagonal_MPIBAIJ,
2434                                        MatDiagonalScale_MPIBAIJ,
2435                                        MatNorm_MPIBAIJ,
2436                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2437                                        MatAssemblyEnd_MPIBAIJ,
2438                                        MatSetOption_MPIBAIJ,
2439                                        MatZeroEntries_MPIBAIJ,
2440                                        /*24*/ MatZeroRows_MPIBAIJ,
2441                                        NULL,
2442                                        NULL,
2443                                        NULL,
2444                                        NULL,
2445                                        /*29*/ MatSetUp_MPI_Hash,
2446                                        NULL,
2447                                        NULL,
2448                                        MatGetDiagonalBlock_MPIBAIJ,
2449                                        NULL,
2450                                        /*34*/ MatDuplicate_MPIBAIJ,
2451                                        NULL,
2452                                        NULL,
2453                                        NULL,
2454                                        NULL,
2455                                        /*39*/ MatAXPY_MPIBAIJ,
2456                                        MatCreateSubMatrices_MPIBAIJ,
2457                                        MatIncreaseOverlap_MPIBAIJ,
2458                                        MatGetValues_MPIBAIJ,
2459                                        MatCopy_MPIBAIJ,
2460                                        /*44*/ NULL,
2461                                        MatScale_MPIBAIJ,
2462                                        MatShift_MPIBAIJ,
2463                                        NULL,
2464                                        MatZeroRowsColumns_MPIBAIJ,
2465                                        /*49*/ NULL,
2466                                        NULL,
2467                                        NULL,
2468                                        NULL,
2469                                        NULL,
2470                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2471                                        NULL,
2472                                        MatSetUnfactored_MPIBAIJ,
2473                                        MatPermute_MPIBAIJ,
2474                                        MatSetValuesBlocked_MPIBAIJ,
2475                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2476                                        MatDestroy_MPIBAIJ,
2477                                        MatView_MPIBAIJ,
2478                                        NULL,
2479                                        NULL,
2480                                        /*64*/ NULL,
2481                                        NULL,
2482                                        NULL,
2483                                        NULL,
2484                                        NULL,
2485                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2486                                        NULL,
2487                                        NULL,
2488                                        NULL,
2489                                        NULL,
2490                                        /*74*/ NULL,
2491                                        MatFDColoringApply_BAIJ,
2492                                        NULL,
2493                                        NULL,
2494                                        NULL,
2495                                        /*79*/ NULL,
2496                                        NULL,
2497                                        NULL,
2498                                        NULL,
2499                                        MatLoad_MPIBAIJ,
2500                                        /*84*/ NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        NULL,
2505                                        /*89*/ NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        /*94*/ NULL,
2511                                        NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        /*99*/ NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        MatConjugate_MPIBAIJ,
2519                                        NULL,
2520                                        /*104*/ NULL,
2521                                        MatRealPart_MPIBAIJ,
2522                                        MatImaginaryPart_MPIBAIJ,
2523                                        NULL,
2524                                        NULL,
2525                                        /*109*/ NULL,
2526                                        NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        MatMissingDiagonal_MPIBAIJ,
2530                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2531                                        NULL,
2532                                        MatGetGhosts_MPIBAIJ,
2533                                        NULL,
2534                                        NULL,
2535                                        /*119*/ NULL,
2536                                        NULL,
2537                                        NULL,
2538                                        NULL,
2539                                        MatGetMultiProcBlock_MPIBAIJ,
2540                                        /*124*/ NULL,
2541                                        MatGetColumnReductions_MPIBAIJ,
2542                                        MatInvertBlockDiagonal_MPIBAIJ,
2543                                        NULL,
2544                                        NULL,
2545                                        /*129*/ NULL,
2546                                        NULL,
2547                                        NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        /*134*/ NULL,
2551                                        NULL,
2552                                        NULL,
2553                                        NULL,
2554                                        NULL,
2555                                        /*139*/ MatSetBlockSizes_Default,
2556                                        NULL,
2557                                        NULL,
2558                                        MatFDColoringSetUp_MPIXAIJ,
2559                                        NULL,
2560                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2561                                        NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        /*150*/ NULL,
2567                                        MatEliminateZeros_MPIBAIJ,
2568                                        NULL};
2569 
2570 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2571 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2572 
2573 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2574 {
2575   PetscInt        m, rstart, cstart, cend;
2576   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2577   const PetscInt *JJ          = NULL;
2578   PetscScalar    *values      = NULL;
2579   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2580   PetscBool       nooffprocentries;
2581 
2582   PetscFunctionBegin;
2583   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2584   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2585   PetscCall(PetscLayoutSetUp(B->rmap));
2586   PetscCall(PetscLayoutSetUp(B->cmap));
2587   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2588   m      = B->rmap->n / bs;
2589   rstart = B->rmap->rstart / bs;
2590   cstart = B->cmap->rstart / bs;
2591   cend   = B->cmap->rend / bs;
2592 
2593   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2594   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2595   for (i = 0; i < m; i++) {
2596     nz = ii[i + 1] - ii[i];
2597     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2598     nz_max = PetscMax(nz_max, nz);
2599     dlen   = 0;
2600     olen   = 0;
2601     JJ     = jj + ii[i];
2602     for (j = 0; j < nz; j++) {
2603       if (*JJ < cstart || *JJ >= cend) olen++;
2604       else dlen++;
2605       JJ++;
2606     }
2607     d_nnz[i] = dlen;
2608     o_nnz[i] = olen;
2609   }
2610   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2611   PetscCall(PetscFree2(d_nnz, o_nnz));
2612 
2613   values = (PetscScalar *)V;
2614   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2615   for (i = 0; i < m; i++) {
2616     PetscInt        row   = i + rstart;
2617     PetscInt        ncols = ii[i + 1] - ii[i];
2618     const PetscInt *icols = jj + ii[i];
2619     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2620       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2621       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2622     } else { /* block ordering does not match so we can only insert one block at a time. */
2623       PetscInt j;
2624       for (j = 0; j < ncols; j++) {
2625         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2626         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2627       }
2628     }
2629   }
2630 
2631   if (!V) PetscCall(PetscFree(values));
2632   nooffprocentries    = B->nooffprocentries;
2633   B->nooffprocentries = PETSC_TRUE;
2634   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2635   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2636   B->nooffprocentries = nooffprocentries;
2637 
2638   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2639   PetscFunctionReturn(PETSC_SUCCESS);
2640 }
2641 
2642 /*@C
2643   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2644 
2645   Collective
2646 
2647   Input Parameters:
2648 + B  - the matrix
2649 . bs - the block size
2650 . i  - the indices into `j` for the start of each local row (starts with zero)
2651 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2652 - v  - optional values in the matrix, use `NULL` if not provided
2653 
2654   Level: advanced
2655 
2656   Notes:
2657   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2658   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2659   called this routine.
2660 
2661   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2662   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2663   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2664   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2665   block column and the second index is over columns within a block.
2666 
2667   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2668 
2669 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2670 @*/
2671 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2672 {
2673   PetscFunctionBegin;
2674   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2675   PetscValidType(B, 1);
2676   PetscValidLogicalCollectiveInt(B, bs, 2);
2677   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2678   PetscFunctionReturn(PETSC_SUCCESS);
2679 }
2680 
2681 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2682 {
2683   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2684   PetscInt     i;
2685   PetscMPIInt  size;
2686 
2687   PetscFunctionBegin;
2688   if (B->hash_active) {
2689     B->ops[0]      = b->cops;
2690     B->hash_active = PETSC_FALSE;
2691   }
2692   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2693   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2694   PetscCall(PetscLayoutSetUp(B->rmap));
2695   PetscCall(PetscLayoutSetUp(B->cmap));
2696   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2697 
2698   if (d_nnz) {
2699     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2700   }
2701   if (o_nnz) {
2702     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2703   }
2704 
2705   b->bs2 = bs * bs;
2706   b->mbs = B->rmap->n / bs;
2707   b->nbs = B->cmap->n / bs;
2708   b->Mbs = B->rmap->N / bs;
2709   b->Nbs = B->cmap->N / bs;
2710 
2711   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2712   b->rstartbs = B->rmap->rstart / bs;
2713   b->rendbs   = B->rmap->rend / bs;
2714   b->cstartbs = B->cmap->rstart / bs;
2715   b->cendbs   = B->cmap->rend / bs;
2716 
2717 #if defined(PETSC_USE_CTABLE)
2718   PetscCall(PetscHMapIDestroy(&b->colmap));
2719 #else
2720   PetscCall(PetscFree(b->colmap));
2721 #endif
2722   PetscCall(PetscFree(b->garray));
2723   PetscCall(VecDestroy(&b->lvec));
2724   PetscCall(VecScatterDestroy(&b->Mvctx));
2725 
2726   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2727 
2728   MatSeqXAIJGetOptions_Private(b->B);
2729   PetscCall(MatDestroy(&b->B));
2730   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2731   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2732   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2733   MatSeqXAIJRestoreOptions_Private(b->B);
2734 
2735   MatSeqXAIJGetOptions_Private(b->A);
2736   PetscCall(MatDestroy(&b->A));
2737   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2738   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2739   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2740   MatSeqXAIJRestoreOptions_Private(b->A);
2741 
2742   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2743   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2744   B->preallocated  = PETSC_TRUE;
2745   B->was_assembled = PETSC_FALSE;
2746   B->assembled     = PETSC_FALSE;
2747   PetscFunctionReturn(PETSC_SUCCESS);
2748 }
2749 
2750 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2751 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2752 
2753 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2754 {
2755   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2756   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2757   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2758   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2759 
2760   PetscFunctionBegin;
2761   PetscCall(PetscMalloc1(M + 1, &ii));
2762   ii[0] = 0;
2763   for (i = 0; i < M; i++) {
2764     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2765     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2766     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2767     /* remove one from count of matrix has diagonal */
2768     for (j = id[i]; j < id[i + 1]; j++) {
2769       if (jd[j] == i) {
2770         ii[i + 1]--;
2771         break;
2772       }
2773     }
2774   }
2775   PetscCall(PetscMalloc1(ii[M], &jj));
2776   cnt = 0;
2777   for (i = 0; i < M; i++) {
2778     for (j = io[i]; j < io[i + 1]; j++) {
2779       if (garray[jo[j]] > rstart) break;
2780       jj[cnt++] = garray[jo[j]];
2781     }
2782     for (k = id[i]; k < id[i + 1]; k++) {
2783       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2784     }
2785     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2786   }
2787   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2788   PetscFunctionReturn(PETSC_SUCCESS);
2789 }
2790 
2791 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2792 
2793 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2794 
2795 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2796 {
2797   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2798   Mat_MPIAIJ  *b;
2799   Mat          B;
2800 
2801   PetscFunctionBegin;
2802   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2803 
2804   if (reuse == MAT_REUSE_MATRIX) {
2805     B = *newmat;
2806   } else {
2807     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2808     PetscCall(MatSetType(B, MATMPIAIJ));
2809     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2810     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2811     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2812     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2813   }
2814   b = (Mat_MPIAIJ *)B->data;
2815 
2816   if (reuse == MAT_REUSE_MATRIX) {
2817     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2818     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2819   } else {
2820     PetscInt   *garray = a->garray;
2821     Mat_SeqAIJ *bB;
2822     PetscInt    bs, nnz;
2823     PetscCall(MatDestroy(&b->A));
2824     PetscCall(MatDestroy(&b->B));
2825     /* just clear out the data structure */
2826     PetscCall(MatDisAssemble_MPIAIJ(B));
2827     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2828     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2829 
2830     /* Global numbering for b->B columns */
2831     bB  = (Mat_SeqAIJ *)b->B->data;
2832     bs  = A->rmap->bs;
2833     nnz = bB->i[A->rmap->n];
2834     for (PetscInt k = 0; k < nnz; k++) {
2835       PetscInt bj = bB->j[k] / bs;
2836       PetscInt br = bB->j[k] % bs;
2837       bB->j[k]    = garray[bj] * bs + br;
2838     }
2839   }
2840   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2841   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2842 
2843   if (reuse == MAT_INPLACE_MATRIX) {
2844     PetscCall(MatHeaderReplace(A, &B));
2845   } else {
2846     *newmat = B;
2847   }
2848   PetscFunctionReturn(PETSC_SUCCESS);
2849 }
2850 
2851 /*MC
2852    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2853 
2854    Options Database Keys:
2855 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2856 . -mat_block_size <bs> - set the blocksize used to store the matrix
2857 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2858 - -mat_use_hash_table <fact> - set hash table factor
2859 
2860    Level: beginner
2861 
2862    Note:
2863     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2864     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2865 
2866 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2867 M*/
2868 
2869 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2870 
2871 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2872 {
2873   Mat_MPIBAIJ *b;
2874   PetscBool    flg = PETSC_FALSE;
2875 
2876   PetscFunctionBegin;
2877   PetscCall(PetscNew(&b));
2878   B->data      = (void *)b;
2879   B->ops[0]    = MatOps_Values;
2880   B->assembled = PETSC_FALSE;
2881 
2882   B->insertmode = NOT_SET_VALUES;
2883   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2884   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2885 
2886   /* build local table of row and column ownerships */
2887   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2888 
2889   /* build cache for off array entries formed */
2890   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2891 
2892   b->donotstash  = PETSC_FALSE;
2893   b->colmap      = NULL;
2894   b->garray      = NULL;
2895   b->roworiented = PETSC_TRUE;
2896 
2897   /* stuff used in block assembly */
2898   b->barray = NULL;
2899 
2900   /* stuff used for matrix vector multiply */
2901   b->lvec  = NULL;
2902   b->Mvctx = NULL;
2903 
2904   /* stuff for MatGetRow() */
2905   b->rowindices   = NULL;
2906   b->rowvalues    = NULL;
2907   b->getrowactive = PETSC_FALSE;
2908 
2909   /* hash table stuff */
2910   b->ht           = NULL;
2911   b->hd           = NULL;
2912   b->ht_size      = 0;
2913   b->ht_flag      = PETSC_FALSE;
2914   b->ht_fact      = 0;
2915   b->ht_total_ct  = 0;
2916   b->ht_insert_ct = 0;
2917 
2918   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2919   b->ijonly = PETSC_FALSE;
2920 
2921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2924 #if defined(PETSC_HAVE_HYPRE)
2925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2926 #endif
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2934   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2935 
2936   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2937   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2938   if (flg) {
2939     PetscReal fact = 1.39;
2940     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2941     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2942     if (fact <= 1.0) fact = 1.39;
2943     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2944     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2945   }
2946   PetscOptionsEnd();
2947   PetscFunctionReturn(PETSC_SUCCESS);
2948 }
2949 
2950 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2951 /*MC
2952    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2953 
2954    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2955    and `MATMPIBAIJ` otherwise.
2956 
2957    Options Database Keys:
2958 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2959 
2960   Level: beginner
2961 
2962 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2963 M*/
2964 
2965 /*@C
2966   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2967   (block compressed row).
2968 
2969   Collective
2970 
2971   Input Parameters:
2972 + B     - the matrix
2973 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2974           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2975 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2976            submatrix  (same for all local rows)
2977 . d_nnz - array containing the number of block nonzeros in the various block rows
2978            of the in diagonal portion of the local (possibly different for each block
2979            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2980            set it even if it is zero.
2981 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2982            submatrix (same for all local rows).
2983 - o_nnz - array containing the number of nonzeros in the various block rows of the
2984            off-diagonal portion of the local submatrix (possibly different for
2985            each block row) or `NULL`.
2986 
2987    If the *_nnz parameter is given then the *_nz parameter is ignored
2988 
2989   Options Database Keys:
2990 + -mat_block_size            - size of the blocks to use
2991 - -mat_use_hash_table <fact> - set hash table factor
2992 
2993   Level: intermediate
2994 
2995   Notes:
2996   For good matrix assembly performance
2997   the user should preallocate the matrix storage by setting the parameters
2998   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2999   performance can be increased by more than a factor of 50.
3000 
3001   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3002   than it must be used on all processors that share the object for that argument.
3003 
3004   Storage Information:
3005   For a square global matrix we define each processor's diagonal portion
3006   to be its local rows and the corresponding columns (a square submatrix);
3007   each processor's off-diagonal portion encompasses the remainder of the
3008   local matrix (a rectangular submatrix).
3009 
3010   The user can specify preallocated storage for the diagonal part of
3011   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3012   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3013   memory allocation.  Likewise, specify preallocated storage for the
3014   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3015 
3016   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3017   the figure below we depict these three local rows and all columns (0-11).
3018 
3019 .vb
3020            0 1 2 3 4 5 6 7 8 9 10 11
3021           --------------------------
3022    row 3  |o o o d d d o o o o  o  o
3023    row 4  |o o o d d d o o o o  o  o
3024    row 5  |o o o d d d o o o o  o  o
3025           --------------------------
3026 .ve
3027 
3028   Thus, any entries in the d locations are stored in the d (diagonal)
3029   submatrix, and any entries in the o locations are stored in the
3030   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3031   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3032 
3033   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3034   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3035   In general, for PDE problems in which most nonzeros are near the diagonal,
3036   one expects `d_nz` >> `o_nz`.
3037 
3038   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3039   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3040   You can also run with the option `-info` and look for messages with the string
3041   malloc in them to see if additional memory allocation was needed.
3042 
3043 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3044 @*/
3045 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3046 {
3047   PetscFunctionBegin;
3048   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3049   PetscValidType(B, 1);
3050   PetscValidLogicalCollectiveInt(B, bs, 2);
3051   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3052   PetscFunctionReturn(PETSC_SUCCESS);
3053 }
3054 
3055 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3056 /*@C
3057   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3058   (block compressed row).
3059 
3060   Collective
3061 
3062   Input Parameters:
3063 + comm  - MPI communicator
3064 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3065           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3066 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3067            This value should be the same as the local size used in creating the
3068            y vector for the matrix-vector product y = Ax.
3069 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3070            This value should be the same as the local size used in creating the
3071            x vector for the matrix-vector product y = Ax.
3072 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3073 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3074 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3075            submatrix  (same for all local rows)
3076 . d_nnz - array containing the number of nonzero blocks in the various block rows
3077            of the in diagonal portion of the local (possibly different for each block
3078            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3079            and set it even if it is zero.
3080 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3081            submatrix (same for all local rows).
3082 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3083            off-diagonal portion of the local submatrix (possibly different for
3084            each block row) or NULL.
3085 
3086   Output Parameter:
3087 . A - the matrix
3088 
3089   Options Database Keys:
3090 + -mat_block_size            - size of the blocks to use
3091 - -mat_use_hash_table <fact> - set hash table factor
3092 
3093   Level: intermediate
3094 
3095   Notes:
3096   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3097   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3098   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3099 
3100   For good matrix assembly performance
3101   the user should preallocate the matrix storage by setting the parameters
3102   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3103   performance can be increased by more than a factor of 50.
3104 
3105   If the *_nnz parameter is given then the *_nz parameter is ignored
3106 
3107   A nonzero block is any block that as 1 or more nonzeros in it
3108 
3109   The user MUST specify either the local or global matrix dimensions
3110   (possibly both).
3111 
3112   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3113   than it must be used on all processors that share the object for that argument.
3114 
3115   Storage Information:
3116   For a square global matrix we define each processor's diagonal portion
3117   to be its local rows and the corresponding columns (a square submatrix);
3118   each processor's off-diagonal portion encompasses the remainder of the
3119   local matrix (a rectangular submatrix).
3120 
3121   The user can specify preallocated storage for the diagonal part of
3122   the local submatrix with either d_nz or d_nnz (not both).  Set
3123   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3124   memory allocation.  Likewise, specify preallocated storage for the
3125   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3126 
3127   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3128   the figure below we depict these three local rows and all columns (0-11).
3129 
3130 .vb
3131            0 1 2 3 4 5 6 7 8 9 10 11
3132           --------------------------
3133    row 3  |o o o d d d o o o o  o  o
3134    row 4  |o o o d d d o o o o  o  o
3135    row 5  |o o o d d d o o o o  o  o
3136           --------------------------
3137 .ve
3138 
3139   Thus, any entries in the d locations are stored in the d (diagonal)
3140   submatrix, and any entries in the o locations are stored in the
3141   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3142   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3143 
3144   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3145   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3146   In general, for PDE problems in which most nonzeros are near the diagonal,
3147   one expects `d_nz` >> `o_nz`.
3148 
3149 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3150 @*/
3151 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3152 {
3153   PetscMPIInt size;
3154 
3155   PetscFunctionBegin;
3156   PetscCall(MatCreate(comm, A));
3157   PetscCall(MatSetSizes(*A, m, n, M, N));
3158   PetscCallMPI(MPI_Comm_size(comm, &size));
3159   if (size > 1) {
3160     PetscCall(MatSetType(*A, MATMPIBAIJ));
3161     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3162   } else {
3163     PetscCall(MatSetType(*A, MATSEQBAIJ));
3164     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3165   }
3166   PetscFunctionReturn(PETSC_SUCCESS);
3167 }
3168 
3169 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3170 {
3171   Mat          mat;
3172   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3173   PetscInt     len = 0;
3174 
3175   PetscFunctionBegin;
3176   *newmat = NULL;
3177   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3178   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3179   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3180 
3181   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3182   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3183   if (matin->hash_active) {
3184     PetscCall(MatSetUp(mat));
3185   } else {
3186     mat->factortype   = matin->factortype;
3187     mat->preallocated = PETSC_TRUE;
3188     mat->assembled    = PETSC_TRUE;
3189     mat->insertmode   = NOT_SET_VALUES;
3190 
3191     a             = (Mat_MPIBAIJ *)mat->data;
3192     mat->rmap->bs = matin->rmap->bs;
3193     a->bs2        = oldmat->bs2;
3194     a->mbs        = oldmat->mbs;
3195     a->nbs        = oldmat->nbs;
3196     a->Mbs        = oldmat->Mbs;
3197     a->Nbs        = oldmat->Nbs;
3198 
3199     a->size         = oldmat->size;
3200     a->rank         = oldmat->rank;
3201     a->donotstash   = oldmat->donotstash;
3202     a->roworiented  = oldmat->roworiented;
3203     a->rowindices   = NULL;
3204     a->rowvalues    = NULL;
3205     a->getrowactive = PETSC_FALSE;
3206     a->barray       = NULL;
3207     a->rstartbs     = oldmat->rstartbs;
3208     a->rendbs       = oldmat->rendbs;
3209     a->cstartbs     = oldmat->cstartbs;
3210     a->cendbs       = oldmat->cendbs;
3211 
3212     /* hash table stuff */
3213     a->ht           = NULL;
3214     a->hd           = NULL;
3215     a->ht_size      = 0;
3216     a->ht_flag      = oldmat->ht_flag;
3217     a->ht_fact      = oldmat->ht_fact;
3218     a->ht_total_ct  = 0;
3219     a->ht_insert_ct = 0;
3220 
3221     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3222     if (oldmat->colmap) {
3223 #if defined(PETSC_USE_CTABLE)
3224       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3225 #else
3226       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3227       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3228 #endif
3229     } else a->colmap = NULL;
3230 
3231     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3232       PetscCall(PetscMalloc1(len, &a->garray));
3233       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3234     } else a->garray = NULL;
3235 
3236     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3237     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3238     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3239 
3240     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3241     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3242   }
3243   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3244   *newmat = mat;
3245   PetscFunctionReturn(PETSC_SUCCESS);
3246 }
3247 
3248 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3249 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3250 {
3251   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3252   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3253   PetscScalar *matvals;
3254 
3255   PetscFunctionBegin;
3256   PetscCall(PetscViewerSetUp(viewer));
3257 
3258   /* read in matrix header */
3259   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3260   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3261   M  = header[1];
3262   N  = header[2];
3263   nz = header[3];
3264   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3265   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3266   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3267 
3268   /* set block sizes from the viewer's .info file */
3269   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3270   /* set local sizes if not set already */
3271   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3272   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3273   /* set global sizes if not set already */
3274   if (mat->rmap->N < 0) mat->rmap->N = M;
3275   if (mat->cmap->N < 0) mat->cmap->N = N;
3276   PetscCall(PetscLayoutSetUp(mat->rmap));
3277   PetscCall(PetscLayoutSetUp(mat->cmap));
3278 
3279   /* check if the matrix sizes are correct */
3280   PetscCall(MatGetSize(mat, &rows, &cols));
3281   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3282   PetscCall(MatGetBlockSize(mat, &bs));
3283   PetscCall(MatGetLocalSize(mat, &m, &n));
3284   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3285   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3286   mbs = m / bs;
3287   nbs = n / bs;
3288 
3289   /* read in row lengths and build row indices */
3290   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3291   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3292   rowidxs[0] = 0;
3293   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3294   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3295   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3296 
3297   /* read in column indices and matrix values */
3298   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3299   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3300   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3301 
3302   {                /* preallocate matrix storage */
3303     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3304     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3305     PetscBool  sbaij, done;
3306     PetscInt  *d_nnz, *o_nnz;
3307 
3308     PetscCall(PetscBTCreate(nbs, &bt));
3309     PetscCall(PetscHSetICreate(&ht));
3310     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3311     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3312     for (i = 0; i < mbs; i++) {
3313       PetscCall(PetscBTMemzero(nbs, bt));
3314       PetscCall(PetscHSetIClear(ht));
3315       for (k = 0; k < bs; k++) {
3316         PetscInt row = bs * i + k;
3317         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3318           PetscInt col = colidxs[j];
3319           if (!sbaij || col >= row) {
3320             if (col >= cs && col < ce) {
3321               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3322             } else {
3323               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3324               if (done) o_nnz[i]++;
3325             }
3326           }
3327         }
3328       }
3329     }
3330     PetscCall(PetscBTDestroy(&bt));
3331     PetscCall(PetscHSetIDestroy(&ht));
3332     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3333     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3334     PetscCall(PetscFree2(d_nnz, o_nnz));
3335   }
3336 
3337   /* store matrix values */
3338   for (i = 0; i < m; i++) {
3339     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3340     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3341   }
3342 
3343   PetscCall(PetscFree(rowidxs));
3344   PetscCall(PetscFree2(colidxs, matvals));
3345   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3346   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3347   PetscFunctionReturn(PETSC_SUCCESS);
3348 }
3349 
3350 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3351 {
3352   PetscBool isbinary;
3353 
3354   PetscFunctionBegin;
3355   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3356   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3357   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3358   PetscFunctionReturn(PETSC_SUCCESS);
3359 }
3360 
3361 /*@
3362   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3363 
3364   Input Parameters:
3365 + mat  - the matrix
3366 - fact - factor
3367 
3368   Options Database Key:
3369 . -mat_use_hash_table <fact> - provide the factor
3370 
3371   Level: advanced
3372 
3373 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3374 @*/
3375 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3376 {
3377   PetscFunctionBegin;
3378   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3379   PetscFunctionReturn(PETSC_SUCCESS);
3380 }
3381 
3382 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3383 {
3384   Mat_MPIBAIJ *baij;
3385 
3386   PetscFunctionBegin;
3387   baij          = (Mat_MPIBAIJ *)mat->data;
3388   baij->ht_fact = fact;
3389   PetscFunctionReturn(PETSC_SUCCESS);
3390 }
3391 
3392 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3393 {
3394   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3395   PetscBool    flg;
3396 
3397   PetscFunctionBegin;
3398   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3399   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3400   if (Ad) *Ad = a->A;
3401   if (Ao) *Ao = a->B;
3402   if (colmap) *colmap = a->garray;
3403   PetscFunctionReturn(PETSC_SUCCESS);
3404 }
3405 
3406 /*
3407     Special version for direct calls from Fortran (to eliminate two function call overheads
3408 */
3409 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3410   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3411 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3412   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3413 #endif
3414 
3415 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3416 /*@C
3417   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3418 
3419   Collective
3420 
3421   Input Parameters:
3422 + matin  - the matrix
3423 . min    - number of input rows
3424 . im     - input rows
3425 . nin    - number of input columns
3426 . in     - input columns
3427 . v      - numerical values input
3428 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3429 
3430   Level: advanced
3431 
3432   Developer Notes:
3433   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3434 
3435 .seealso: `Mat`, `MatSetValuesBlocked()`
3436 @*/
3437 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3438 {
3439   /* convert input arguments to C version */
3440   Mat        mat = *matin;
3441   PetscInt   m = *min, n = *nin;
3442   InsertMode addv = *addvin;
3443 
3444   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3445   const MatScalar *value;
3446   MatScalar       *barray      = baij->barray;
3447   PetscBool        roworiented = baij->roworiented;
3448   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3449   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3450   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3451 
3452   PetscFunctionBegin;
3453   /* tasks normally handled by MatSetValuesBlocked() */
3454   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3455   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3456   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3457   if (mat->assembled) {
3458     mat->was_assembled = PETSC_TRUE;
3459     mat->assembled     = PETSC_FALSE;
3460   }
3461   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3462 
3463   if (!barray) {
3464     PetscCall(PetscMalloc1(bs2, &barray));
3465     baij->barray = barray;
3466   }
3467 
3468   if (roworiented) stepval = (n - 1) * bs;
3469   else stepval = (m - 1) * bs;
3470 
3471   for (i = 0; i < m; i++) {
3472     if (im[i] < 0) continue;
3473     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3474     if (im[i] >= rstart && im[i] < rend) {
3475       row = im[i] - rstart;
3476       for (j = 0; j < n; j++) {
3477         /* If NumCol = 1 then a copy is not required */
3478         if ((roworiented) && (n == 1)) {
3479           barray = (MatScalar *)v + i * bs2;
3480         } else if ((!roworiented) && (m == 1)) {
3481           barray = (MatScalar *)v + j * bs2;
3482         } else { /* Here a copy is required */
3483           if (roworiented) {
3484             value = v + i * (stepval + bs) * bs + j * bs;
3485           } else {
3486             value = v + j * (stepval + bs) * bs + i * bs;
3487           }
3488           for (ii = 0; ii < bs; ii++, value += stepval) {
3489             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3490           }
3491           barray -= bs2;
3492         }
3493 
3494         if (in[j] >= cstart && in[j] < cend) {
3495           col = in[j] - cstart;
3496           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3497         } else if (in[j] < 0) {
3498           continue;
3499         } else {
3500           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3501           if (mat->was_assembled) {
3502             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3503 
3504 #if defined(PETSC_USE_DEBUG)
3505   #if defined(PETSC_USE_CTABLE)
3506             {
3507               PetscInt data;
3508               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3509               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3510             }
3511   #else
3512             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3513   #endif
3514 #endif
3515 #if defined(PETSC_USE_CTABLE)
3516             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3517             col = (col - 1) / bs;
3518 #else
3519             col = (baij->colmap[in[j]] - 1) / bs;
3520 #endif
3521             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3522               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3523               col = in[j];
3524             }
3525           } else col = in[j];
3526           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3527         }
3528       }
3529     } else {
3530       if (!baij->donotstash) {
3531         if (roworiented) {
3532           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3533         } else {
3534           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3535         }
3536       }
3537     }
3538   }
3539 
3540   /* task normally handled by MatSetValuesBlocked() */
3541   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3542   PetscFunctionReturn(PETSC_SUCCESS);
3543 }
3544 
3545 /*@
3546   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3547 
3548   Collective
3549 
3550   Input Parameters:
3551 + comm - MPI communicator
3552 . bs   - the block size, only a block size of 1 is supported
3553 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3554 . n    - This value should be the same as the local size used in creating the
3555          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3556          calculated if `N` is given) For square matrices `n` is almost always `m`.
3557 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3558 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3559 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3560 . j    - column indices
3561 - a    - matrix values
3562 
3563   Output Parameter:
3564 . mat - the matrix
3565 
3566   Level: intermediate
3567 
3568   Notes:
3569   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3570   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3571   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3572 
3573   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3574   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3575   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3576   with column-major ordering within blocks.
3577 
3578   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3579 
3580 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3581           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3582 @*/
3583 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3584 {
3585   PetscFunctionBegin;
3586   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3587   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3588   PetscCall(MatCreate(comm, mat));
3589   PetscCall(MatSetSizes(*mat, m, n, M, N));
3590   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3591   PetscCall(MatSetBlockSize(*mat, bs));
3592   PetscCall(MatSetUp(*mat));
3593   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3594   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3595   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3596   PetscFunctionReturn(PETSC_SUCCESS);
3597 }
3598 
3599 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3600 {
3601   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3602   PetscInt    *indx;
3603   PetscScalar *values;
3604 
3605   PetscFunctionBegin;
3606   PetscCall(MatGetSize(inmat, &m, &N));
3607   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3608     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3609     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3610     PetscInt    *bindx, rmax = a->rmax, j;
3611     PetscMPIInt  rank, size;
3612 
3613     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3614     mbs = m / bs;
3615     Nbs = N / cbs;
3616     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3617     nbs = n / cbs;
3618 
3619     PetscCall(PetscMalloc1(rmax, &bindx));
3620     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3621 
3622     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3623     PetscCallMPI(MPI_Comm_rank(comm, &size));
3624     if (rank == size - 1) {
3625       /* Check sum(nbs) = Nbs */
3626       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3627     }
3628 
3629     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3630     for (i = 0; i < mbs; i++) {
3631       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3632       nnz = nnz / bs;
3633       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3634       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3635       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3636     }
3637     PetscCall(PetscFree(bindx));
3638 
3639     PetscCall(MatCreate(comm, outmat));
3640     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3641     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3642     PetscCall(MatSetType(*outmat, MATBAIJ));
3643     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3644     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3645     MatPreallocateEnd(dnz, onz);
3646     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3647   }
3648 
3649   /* numeric phase */
3650   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3651   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3652 
3653   for (i = 0; i < m; i++) {
3654     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3655     Ii = i + rstart;
3656     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3657     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3658   }
3659   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3660   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3661   PetscFunctionReturn(PETSC_SUCCESS);
3662 }
3663