xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision d9acb416d05abeed0a33bde3a81aeb2ea0364f6a)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
101 {
102   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
103 
104   PetscFunctionBegin;
105   PetscCall(MatStoreValues(aij->A));
106   PetscCall(MatStoreValues(aij->B));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
111 {
112   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
113 
114   PetscFunctionBegin;
115   PetscCall(MatRetrieveValues(aij->A));
116   PetscCall(MatRetrieveValues(aij->B));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 /*
121      Local utility routine that creates a mapping from the global column
122    number to the local number in the off-diagonal part of the local
123    storage of the matrix.  This is done in a non scalable way since the
124    length of colmap equals the global matrix length.
125 */
126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
127 {
128   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
129   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
130   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
131 
132   PetscFunctionBegin;
133 #if defined(PETSC_USE_CTABLE)
134   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
135   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
136 #else
137   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
138   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
139 #endif
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
144   do { \
145     brow = row / bs; \
146     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
147     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
148     rmax = aimax[brow]; \
149     nrow = ailen[brow]; \
150     bcol = col / bs; \
151     ridx = row % bs; \
152     cidx = col % bs; \
153     low  = 0; \
154     high = nrow; \
155     while (high - low > 3) { \
156       t = (low + high) / 2; \
157       if (rp[t] > bcol) high = t; \
158       else low = t; \
159     } \
160     for (_i = low; _i < high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap + bs2 * _i + bs * cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value; \
165         else *bap = value; \
166         goto a_noinsert; \
167       } \
168     } \
169     if (a->nonew == 1) goto a_noinsert; \
170     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
171     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
172     N = nrow++ - 1; \
173     /* shift up all the later entries in this row */ \
174     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
175     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
176     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
177     rp[_i]                          = bcol; \
178     ap[bs2 * _i + bs * cidx + ridx] = value; \
179   a_noinsert:; \
180     ailen[brow] = nrow; \
181   } while (0)
182 
183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
184   do { \
185     brow = row / bs; \
186     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
187     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
188     rmax = bimax[brow]; \
189     nrow = bilen[brow]; \
190     bcol = col / bs; \
191     ridx = row % bs; \
192     cidx = col % bs; \
193     low  = 0; \
194     high = nrow; \
195     while (high - low > 3) { \
196       t = (low + high) / 2; \
197       if (rp[t] > bcol) high = t; \
198       else low = t; \
199     } \
200     for (_i = low; _i < high; _i++) { \
201       if (rp[_i] > bcol) break; \
202       if (rp[_i] == bcol) { \
203         bap = ap + bs2 * _i + bs * cidx + ridx; \
204         if (addv == ADD_VALUES) *bap += value; \
205         else *bap = value; \
206         goto b_noinsert; \
207       } \
208     } \
209     if (b->nonew == 1) goto b_noinsert; \
210     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
211     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
212     N = nrow++ - 1; \
213     /* shift up all the later entries in this row */ \
214     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
215     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
216     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
217     rp[_i]                          = bcol; \
218     ap[bs2 * _i + bs * cidx + ridx] = value; \
219   b_noinsert:; \
220     bilen[brow] = nrow; \
221   } while (0)
222 
223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
224 {
225   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
226   MatScalar    value;
227   PetscBool    roworiented = baij->roworiented;
228   PetscInt     i, j, row, col;
229   PetscInt     rstart_orig = mat->rmap->rstart;
230   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
231   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
232 
233   /* Some Variables required in the macro */
234   Mat          A     = baij->A;
235   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
236   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
237   MatScalar   *aa = a->a;
238 
239   Mat          B     = baij->B;
240   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
241   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
242   MatScalar   *ba = b->a;
243 
244   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
245   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
246   MatScalar *ap, *bap;
247 
248   PetscFunctionBegin;
249   for (i = 0; i < m; i++) {
250     if (im[i] < 0) continue;
251     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
252     if (im[i] >= rstart_orig && im[i] < rend_orig) {
253       row = im[i] - rstart_orig;
254       for (j = 0; j < n; j++) {
255         if (in[j] >= cstart_orig && in[j] < cend_orig) {
256           col = in[j] - cstart_orig;
257           if (roworiented) value = v[i * n + j];
258           else value = v[i + j * m];
259           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
260         } else if (in[j] < 0) {
261           continue;
262         } else {
263           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
264           if (mat->was_assembled) {
265             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
266 #if defined(PETSC_USE_CTABLE)
267             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
268             col = col - 1;
269 #else
270             col = baij->colmap[in[j] / bs] - 1;
271 #endif
272             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
273               PetscCall(MatDisAssemble_MPIBAIJ(mat));
274               col = in[j];
275               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
276               B     = baij->B;
277               b     = (Mat_SeqBAIJ *)(B)->data;
278               bimax = b->imax;
279               bi    = b->i;
280               bilen = b->ilen;
281               bj    = b->j;
282               ba    = b->a;
283             } else {
284               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
285               col += in[j] % bs;
286             }
287           } else col = in[j];
288           if (roworiented) value = v[i * n + j];
289           else value = v[i + j * m];
290           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
291           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
292         }
293       }
294     } else {
295       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
296       if (!baij->donotstash) {
297         mat->assembled = PETSC_FALSE;
298         if (roworiented) {
299           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
300         } else {
301           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
302         }
303       }
304     }
305   }
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
310 {
311   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
312   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
313   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
314   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
315   PetscBool          roworiented = a->roworiented;
316   const PetscScalar *value       = v;
317   MatScalar         *ap, *aa = a->a, *bap;
318 
319   PetscFunctionBegin;
320   rp    = aj + ai[row];
321   ap    = aa + bs2 * ai[row];
322   rmax  = imax[row];
323   nrow  = ailen[row];
324   value = v;
325   low   = 0;
326   high  = nrow;
327   while (high - low > 7) {
328     t = (low + high) / 2;
329     if (rp[t] > col) high = t;
330     else low = t;
331   }
332   for (i = low; i < high; i++) {
333     if (rp[i] > col) break;
334     if (rp[i] == col) {
335       bap = ap + bs2 * i;
336       if (roworiented) {
337         if (is == ADD_VALUES) {
338           for (ii = 0; ii < bs; ii++) {
339             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
340           }
341         } else {
342           for (ii = 0; ii < bs; ii++) {
343             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
344           }
345         }
346       } else {
347         if (is == ADD_VALUES) {
348           for (ii = 0; ii < bs; ii++, value += bs) {
349             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
350             bap += bs;
351           }
352         } else {
353           for (ii = 0; ii < bs; ii++, value += bs) {
354             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
355             bap += bs;
356           }
357         }
358       }
359       goto noinsert2;
360     }
361   }
362   if (nonew == 1) goto noinsert2;
363   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
364   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
365   N = nrow++ - 1;
366   high++;
367   /* shift up all the later entries in this row */
368   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
369   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
370   rp[i] = col;
371   bap   = ap + bs2 * i;
372   if (roworiented) {
373     for (ii = 0; ii < bs; ii++) {
374       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
375     }
376   } else {
377     for (ii = 0; ii < bs; ii++) {
378       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
379     }
380   }
381 noinsert2:;
382   ailen[row] = nrow;
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 /*
387     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
388     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
389 */
390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
391 {
392   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
393   const PetscScalar *value;
394   MatScalar         *barray      = baij->barray;
395   PetscBool          roworiented = baij->roworiented;
396   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
397   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
398   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
399 
400   PetscFunctionBegin;
401   if (!barray) {
402     PetscCall(PetscMalloc1(bs2, &barray));
403     baij->barray = barray;
404   }
405 
406   if (roworiented) stepval = (n - 1) * bs;
407   else stepval = (m - 1) * bs;
408 
409   for (i = 0; i < m; i++) {
410     if (im[i] < 0) continue;
411     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
412     if (im[i] >= rstart && im[i] < rend) {
413       row = im[i] - rstart;
414       for (j = 0; j < n; j++) {
415         /* If NumCol = 1 then a copy is not required */
416         if ((roworiented) && (n == 1)) {
417           barray = (MatScalar *)v + i * bs2;
418         } else if ((!roworiented) && (m == 1)) {
419           barray = (MatScalar *)v + j * bs2;
420         } else { /* Here a copy is required */
421           if (roworiented) {
422             value = v + (i * (stepval + bs) + j) * bs;
423           } else {
424             value = v + (j * (stepval + bs) + i) * bs;
425           }
426           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
427             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
428             barray += bs;
429           }
430           barray -= bs2;
431         }
432 
433         if (in[j] >= cstart && in[j] < cend) {
434           col = in[j] - cstart;
435           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
436         } else if (in[j] < 0) {
437           continue;
438         } else {
439           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
440           if (mat->was_assembled) {
441             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
442 
443 #if defined(PETSC_USE_DEBUG)
444   #if defined(PETSC_USE_CTABLE)
445             {
446               PetscInt data;
447               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
448               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
449             }
450   #else
451             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
452   #endif
453 #endif
454 #if defined(PETSC_USE_CTABLE)
455             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
456             col = (col - 1) / bs;
457 #else
458             col = (baij->colmap[in[j]] - 1) / bs;
459 #endif
460             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
461               PetscCall(MatDisAssemble_MPIBAIJ(mat));
462               col = in[j];
463             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
464           } else col = in[j];
465           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
466         }
467       }
468     } else {
469       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
470       if (!baij->donotstash) {
471         if (roworiented) {
472           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
473         } else {
474           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475         }
476       }
477     }
478   }
479   PetscFunctionReturn(PETSC_SUCCESS);
480 }
481 
482 #define HASH_KEY             0.6180339887
483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
487 {
488   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
489   PetscBool    roworiented = baij->roworiented;
490   PetscInt     i, j, row, col;
491   PetscInt     rstart_orig = mat->rmap->rstart;
492   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
493   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
494   PetscReal    tmp;
495   MatScalar  **HD       = baij->hd, value;
496   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
497 
498   PetscFunctionBegin;
499   for (i = 0; i < m; i++) {
500     if (PetscDefined(USE_DEBUG)) {
501       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
502       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
503     }
504     row = im[i];
505     if (row >= rstart_orig && row < rend_orig) {
506       for (j = 0; j < n; j++) {
507         col = in[j];
508         if (roworiented) value = v[i * n + j];
509         else value = v[i + j * m];
510         /* Look up PetscInto the Hash Table */
511         key = (row / bs) * Nbs + (col / bs) + 1;
512         h1  = HASH(size, key, tmp);
513 
514         idx = h1;
515         if (PetscDefined(USE_DEBUG)) {
516           insert_ct++;
517           total_ct++;
518           if (HT[idx] != key) {
519             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
520               ;
521             if (idx == size) {
522               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
523                 ;
524               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
525             }
526           }
527         } else if (HT[idx] != key) {
528           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
529             ;
530           if (idx == size) {
531             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
532               ;
533             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
534           }
535         }
536         /* A HASH table entry is found, so insert the values at the correct address */
537         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
538         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
539       }
540     } else if (!baij->donotstash) {
541       if (roworiented) {
542         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
543       } else {
544         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
545       }
546     }
547   }
548   if (PetscDefined(USE_DEBUG)) {
549     baij->ht_total_ct += total_ct;
550     baij->ht_insert_ct += insert_ct;
551   }
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
556 {
557   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
558   PetscBool          roworiented = baij->roworiented;
559   PetscInt           i, j, ii, jj, row, col;
560   PetscInt           rstart = baij->rstartbs;
561   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
562   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
563   PetscReal          tmp;
564   MatScalar        **HD = baij->hd, *baij_a;
565   const PetscScalar *v_t, *value;
566   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
567 
568   PetscFunctionBegin;
569   if (roworiented) stepval = (n - 1) * bs;
570   else stepval = (m - 1) * bs;
571 
572   for (i = 0; i < m; i++) {
573     if (PetscDefined(USE_DEBUG)) {
574       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
575       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
576     }
577     row = im[i];
578     v_t = v + i * nbs2;
579     if (row >= rstart && row < rend) {
580       for (j = 0; j < n; j++) {
581         col = in[j];
582 
583         /* Look up into the Hash Table */
584         key = row * Nbs + col + 1;
585         h1  = HASH(size, key, tmp);
586 
587         idx = h1;
588         if (PetscDefined(USE_DEBUG)) {
589           total_ct++;
590           insert_ct++;
591           if (HT[idx] != key) {
592             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
593               ;
594             if (idx == size) {
595               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
596                 ;
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
602             ;
603           if (idx == size) {
604             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
605               ;
606             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
607           }
608         }
609         baij_a = HD[idx];
610         if (roworiented) {
611           /*value = v + i*(stepval+bs)*bs + j*bs;*/
612           /* value = v + (i*(stepval+bs)+j)*bs; */
613           value = v_t;
614           v_t += bs;
615           if (addv == ADD_VALUES) {
616             for (ii = 0; ii < bs; ii++, value += stepval) {
617               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
618             }
619           } else {
620             for (ii = 0; ii < bs; ii++, value += stepval) {
621               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
622             }
623           }
624         } else {
625           value = v + j * (stepval + bs) * bs + i * bs;
626           if (addv == ADD_VALUES) {
627             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
628               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
629             }
630           } else {
631             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
632               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
633             }
634           }
635         }
636       }
637     } else {
638       if (!baij->donotstash) {
639         if (roworiented) {
640           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         } else {
642           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         }
644       }
645     }
646   }
647   if (PetscDefined(USE_DEBUG)) {
648     baij->ht_total_ct += total_ct;
649     baij->ht_insert_ct += insert_ct;
650   }
651   PetscFunctionReturn(PETSC_SUCCESS);
652 }
653 
654 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
655 {
656   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
657   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
658   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
659 
660   PetscFunctionBegin;
661   for (i = 0; i < m; i++) {
662     if (idxm[i] < 0) continue; /* negative row */
663     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
664     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
665       row = idxm[i] - bsrstart;
666       for (j = 0; j < n; j++) {
667         if (idxn[j] < 0) continue; /* negative column */
668         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
669         if (idxn[j] >= bscstart && idxn[j] < bscend) {
670           col = idxn[j] - bscstart;
671           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
672         } else {
673           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
674 #if defined(PETSC_USE_CTABLE)
675           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
676           data--;
677 #else
678           data = baij->colmap[idxn[j] / bs] - 1;
679 #endif
680           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
681           else {
682             col = data + idxn[j] % bs;
683             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
684           }
685         }
686       }
687     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
688   }
689   PetscFunctionReturn(PETSC_SUCCESS);
690 }
691 
692 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
693 {
694   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
695   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
696   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
697   PetscReal    sum = 0.0;
698   MatScalar   *v;
699 
700   PetscFunctionBegin;
701   if (baij->size == 1) {
702     PetscCall(MatNorm(baij->A, type, nrm));
703   } else {
704     if (type == NORM_FROBENIUS) {
705       v  = amat->a;
706       nz = amat->nz * bs2;
707       for (i = 0; i < nz; i++) {
708         sum += PetscRealPart(PetscConj(*v) * (*v));
709         v++;
710       }
711       v  = bmat->a;
712       nz = bmat->nz * bs2;
713       for (i = 0; i < nz; i++) {
714         sum += PetscRealPart(PetscConj(*v) * (*v));
715         v++;
716       }
717       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
718       *nrm = PetscSqrtReal(*nrm);
719     } else if (type == NORM_1) { /* max column sum */
720       PetscReal *tmp, *tmp2;
721       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
722       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
723       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           col = bs * (cstart + *jj) + j; /* column index */
729           for (row = 0; row < bs; row++) {
730             tmp[col] += PetscAbsScalar(*v);
731             v++;
732           }
733         }
734         jj++;
735       }
736       v  = bmat->a;
737       jj = bmat->j;
738       for (i = 0; i < bmat->nz; i++) {
739         for (j = 0; j < bs; j++) {
740           col = bs * garray[*jj] + j;
741           for (row = 0; row < bs; row++) {
742             tmp[col] += PetscAbsScalar(*v);
743             v++;
744           }
745         }
746         jj++;
747       }
748       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
749       *nrm = 0.0;
750       for (j = 0; j < mat->cmap->N; j++) {
751         if (tmp2[j] > *nrm) *nrm = tmp2[j];
752       }
753       PetscCall(PetscFree(tmp));
754       PetscCall(PetscFree(tmp2));
755     } else if (type == NORM_INFINITY) { /* max row sum */
756       PetscReal *sums;
757       PetscCall(PetscMalloc1(bs, &sums));
758       sum = 0.0;
759       for (j = 0; j < amat->mbs; j++) {
760         for (row = 0; row < bs; row++) sums[row] = 0.0;
761         v  = amat->a + bs2 * amat->i[j];
762         nz = amat->i[j + 1] - amat->i[j];
763         for (i = 0; i < nz; i++) {
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               sums[row] += PetscAbsScalar(*v);
767               v++;
768             }
769           }
770         }
771         v  = bmat->a + bs2 * bmat->i[j];
772         nz = bmat->i[j + 1] - bmat->i[j];
773         for (i = 0; i < nz; i++) {
774           for (col = 0; col < bs; col++) {
775             for (row = 0; row < bs; row++) {
776               sums[row] += PetscAbsScalar(*v);
777               v++;
778             }
779           }
780         }
781         for (row = 0; row < bs; row++) {
782           if (sums[row] > sum) sum = sums[row];
783         }
784       }
785       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786       PetscCall(PetscFree(sums));
787     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788   }
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 /*
793   Creates the hash table, and sets the table
794   This table is created only once.
795   If new entried need to be added to the matrix
796   then the hash table has to be destroyed and
797   recreated.
798 */
799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800 {
801   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802   Mat          A = baij->A, B = baij->B;
803   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807   PetscInt    *HT, key;
808   MatScalar  **HD;
809   PetscReal    tmp;
810 #if defined(PETSC_USE_INFO)
811   PetscInt ct = 0, max = 0;
812 #endif
813 
814   PetscFunctionBegin;
815   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
816 
817   baij->ht_size = (PetscInt)(factor * nz);
818   ht_size       = baij->ht_size;
819 
820   /* Allocate Memory for Hash Table */
821   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822   HD = baij->hd;
823   HT = baij->ht;
824 
825   /* Loop Over A */
826   for (i = 0; i < a->mbs; i++) {
827     for (j = ai[i]; j < ai[i + 1]; j++) {
828       row = i + rstart;
829       col = aj[j] + cstart;
830 
831       key = row * Nbs + col + 1;
832       h1  = HASH(ht_size, key, tmp);
833       for (k = 0; k < ht_size; k++) {
834         if (!HT[(h1 + k) % ht_size]) {
835           HT[(h1 + k) % ht_size] = key;
836           HD[(h1 + k) % ht_size] = a->a + j * bs2;
837           break;
838 #if defined(PETSC_USE_INFO)
839         } else {
840           ct++;
841 #endif
842         }
843       }
844 #if defined(PETSC_USE_INFO)
845       if (k > max) max = k;
846 #endif
847     }
848   }
849   /* Loop Over B */
850   for (i = 0; i < b->mbs; i++) {
851     for (j = bi[i]; j < bi[i + 1]; j++) {
852       row = i + rstart;
853       col = garray[bj[j]];
854       key = row * Nbs + col + 1;
855       h1  = HASH(ht_size, key, tmp);
856       for (k = 0; k < ht_size; k++) {
857         if (!HT[(h1 + k) % ht_size]) {
858           HT[(h1 + k) % ht_size] = key;
859           HD[(h1 + k) % ht_size] = b->a + j * bs2;
860           break;
861 #if defined(PETSC_USE_INFO)
862         } else {
863           ct++;
864 #endif
865         }
866       }
867 #if defined(PETSC_USE_INFO)
868       if (k > max) max = k;
869 #endif
870     }
871   }
872 
873   /* Print Summary */
874 #if defined(PETSC_USE_INFO)
875   for (i = 0, j = 0; i < ht_size; i++) {
876     if (HT[i]) j++;
877   }
878   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
879 #endif
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886   PetscInt     nstash, reallocs;
887 
888   PetscFunctionBegin;
889   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
890 
891   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901 {
902   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
904   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
905   PetscInt    *row, *col;
906   PetscBool    r1, r2, r3, other_disassembled;
907   MatScalar   *val;
908   PetscMPIInt  n;
909 
910   PetscFunctionBegin;
911   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912   if (!baij->donotstash && !mat->nooffprocentries) {
913     while (1) {
914       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915       if (!flg) break;
916 
917       for (i = 0; i < n;) {
918         /* Now identify the consecutive vals belonging to the same row */
919         for (j = i, rstart = row[j]; j < n; j++) {
920           if (row[j] != rstart) break;
921         }
922         if (j < n) ncols = j - i;
923         else ncols = n - i;
924         /* Now assemble all these values with a single function call */
925         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926         i = j;
927       }
928     }
929     PetscCall(MatStashScatterEnd_Private(&mat->stash));
930     /* Now process the block-stash. Since the values are stashed column-oriented,
931        set the roworiented flag to column oriented, and after MatSetValues()
932        restore the original flags */
933     r1 = baij->roworiented;
934     r2 = a->roworiented;
935     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
936 
937     baij->roworiented = PETSC_FALSE;
938     a->roworiented    = PETSC_FALSE;
939 
940     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
941     while (1) {
942       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
943       if (!flg) break;
944 
945       for (i = 0; i < n;) {
946         /* Now identify the consecutive vals belonging to the same row */
947         for (j = i, rstart = row[j]; j < n; j++) {
948           if (row[j] != rstart) break;
949         }
950         if (j < n) ncols = j - i;
951         else ncols = n - i;
952         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
953         i = j;
954       }
955     }
956     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
957 
958     baij->roworiented = r1;
959     a->roworiented    = r2;
960 
961     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
962   }
963 
964   PetscCall(MatAssemblyBegin(baij->A, mode));
965   PetscCall(MatAssemblyEnd(baij->A, mode));
966 
967   /* determine if any processor has disassembled, if so we must
968      also disassemble ourselves, in order that we may reassemble. */
969   /*
970      if nonzero structure of submatrix B cannot change then we know that
971      no processor disassembled thus we can skip this stuff
972   */
973   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
974     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
975     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
976   }
977 
978   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
979   PetscCall(MatAssemblyBegin(baij->B, mode));
980   PetscCall(MatAssemblyEnd(baij->B, mode));
981 
982 #if defined(PETSC_USE_INFO)
983   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
984     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
985 
986     baij->ht_total_ct  = 0;
987     baij->ht_insert_ct = 0;
988   }
989 #endif
990   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
991     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
992 
993     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
994     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
995   }
996 
997   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
998 
999   baij->rowvalues = NULL;
1000 
1001   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1002   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
1003     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1004     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1005   }
1006   PetscFunctionReturn(PETSC_SUCCESS);
1007 }
1008 
1009 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1010 #include <petscdraw.h>
1011 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1012 {
1013   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1014   PetscMPIInt       rank = baij->rank;
1015   PetscInt          bs   = mat->rmap->bs;
1016   PetscBool         iascii, isdraw;
1017   PetscViewer       sviewer;
1018   PetscViewerFormat format;
1019 
1020   PetscFunctionBegin;
1021   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1022   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1023   if (iascii) {
1024     PetscCall(PetscViewerGetFormat(viewer, &format));
1025     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1026       MatInfo info;
1027       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1028       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1029       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1030       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1031                                                    mat->rmap->bs, (double)info.memory));
1032       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1033       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1034       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1035       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1036       PetscCall(PetscViewerFlush(viewer));
1037       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1038       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1039       PetscCall(VecScatterView(baij->Mvctx, viewer));
1040       PetscFunctionReturn(PETSC_SUCCESS);
1041     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1042       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1043       PetscFunctionReturn(PETSC_SUCCESS);
1044     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1045       PetscFunctionReturn(PETSC_SUCCESS);
1046     }
1047   }
1048 
1049   if (isdraw) {
1050     PetscDraw draw;
1051     PetscBool isnull;
1052     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1053     PetscCall(PetscDrawIsNull(draw, &isnull));
1054     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1055   }
1056 
1057   {
1058     /* assemble the entire matrix onto first processor. */
1059     Mat          A;
1060     Mat_SeqBAIJ *Aloc;
1061     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1062     MatScalar   *a;
1063     const char  *matname;
1064 
1065     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1066     /* Perhaps this should be the type of mat? */
1067     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1068     if (rank == 0) {
1069       PetscCall(MatSetSizes(A, M, N, M, N));
1070     } else {
1071       PetscCall(MatSetSizes(A, 0, 0, M, N));
1072     }
1073     PetscCall(MatSetType(A, MATMPIBAIJ));
1074     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1075     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1076 
1077     /* copy over the A part */
1078     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1079     ai   = Aloc->i;
1080     aj   = Aloc->j;
1081     a    = Aloc->a;
1082     PetscCall(PetscMalloc1(bs, &rvals));
1083 
1084     for (i = 0; i < mbs; i++) {
1085       rvals[0] = bs * (baij->rstartbs + i);
1086       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1087       for (j = ai[i]; j < ai[i + 1]; j++) {
1088         col = (baij->cstartbs + aj[j]) * bs;
1089         for (k = 0; k < bs; k++) {
1090           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1091           col++;
1092           a += bs;
1093         }
1094       }
1095     }
1096     /* copy over the B part */
1097     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1098     ai   = Aloc->i;
1099     aj   = Aloc->j;
1100     a    = Aloc->a;
1101     for (i = 0; i < mbs; i++) {
1102       rvals[0] = bs * (baij->rstartbs + i);
1103       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1104       for (j = ai[i]; j < ai[i + 1]; j++) {
1105         col = baij->garray[aj[j]] * bs;
1106         for (k = 0; k < bs; k++) {
1107           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1108           col++;
1109           a += bs;
1110         }
1111       }
1112     }
1113     PetscCall(PetscFree(rvals));
1114     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1115     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1116     /*
1117        Everyone has to call to draw the matrix since the graphics waits are
1118        synchronized across all processors that share the PetscDraw object
1119     */
1120     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1121     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1122     if (rank == 0) {
1123       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1124       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1125     }
1126     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1127     PetscCall(MatDestroy(&A));
1128   }
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1133 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1134 {
1135   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1136   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1137   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1138   const PetscInt *garray = aij->garray;
1139   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1140   PetscInt64      nz, hnz;
1141   PetscInt       *rowlens, *colidxs;
1142   PetscScalar    *matvals;
1143   PetscMPIInt     rank;
1144 
1145   PetscFunctionBegin;
1146   PetscCall(PetscViewerSetUp(viewer));
1147 
1148   M  = mat->rmap->N;
1149   N  = mat->cmap->N;
1150   m  = mat->rmap->n;
1151   rs = mat->rmap->rstart;
1152   cs = mat->cmap->rstart;
1153   bs = mat->rmap->bs;
1154   nz = bs * bs * (A->nz + B->nz);
1155 
1156   /* write matrix header */
1157   header[0] = MAT_FILE_CLASSID;
1158   header[1] = M;
1159   header[2] = N;
1160   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1161   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1162   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1163   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1164 
1165   /* fill in and store row lengths */
1166   PetscCall(PetscMalloc1(m, &rowlens));
1167   for (cnt = 0, i = 0; i < A->mbs; i++)
1168     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1169   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1170   PetscCall(PetscFree(rowlens));
1171 
1172   /* fill in and store column indices */
1173   PetscCall(PetscMalloc1(nz, &colidxs));
1174   for (cnt = 0, i = 0; i < A->mbs; i++) {
1175     for (k = 0; k < bs; k++) {
1176       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1177         if (garray[B->j[jb]] > cs / bs) break;
1178         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1179       }
1180       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1181         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1182       for (; jb < B->i[i + 1]; jb++)
1183         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1184     }
1185   }
1186   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1187   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1188   PetscCall(PetscFree(colidxs));
1189 
1190   /* fill in and store nonzero values */
1191   PetscCall(PetscMalloc1(nz, &matvals));
1192   for (cnt = 0, i = 0; i < A->mbs; i++) {
1193     for (k = 0; k < bs; k++) {
1194       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1195         if (garray[B->j[jb]] > cs / bs) break;
1196         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1197       }
1198       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1199         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1200       for (; jb < B->i[i + 1]; jb++)
1201         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1202     }
1203   }
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1205   PetscCall(PetscFree(matvals));
1206 
1207   /* write block size option to the viewer's .info file */
1208   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1209   PetscFunctionReturn(PETSC_SUCCESS);
1210 }
1211 
1212 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1213 {
1214   PetscBool iascii, isdraw, issocket, isbinary;
1215 
1216   PetscFunctionBegin;
1217   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1218   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1219   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1220   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1221   if (iascii || isdraw || issocket) {
1222     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1223   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1224   PetscFunctionReturn(PETSC_SUCCESS);
1225 }
1226 
1227 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1228 {
1229   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1230   PetscInt     nt;
1231 
1232   PetscFunctionBegin;
1233   PetscCall(VecGetLocalSize(xx, &nt));
1234   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1235   PetscCall(VecGetLocalSize(yy, &nt));
1236   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1237   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1238   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1239   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1240   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1245 {
1246   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1247 
1248   PetscFunctionBegin;
1249   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1250   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1251   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1252   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1253   PetscFunctionReturn(PETSC_SUCCESS);
1254 }
1255 
1256 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1257 {
1258   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1259 
1260   PetscFunctionBegin;
1261   /* do nondiagonal part */
1262   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1263   /* do local part */
1264   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1265   /* add partial results together */
1266   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1267   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1268   PetscFunctionReturn(PETSC_SUCCESS);
1269 }
1270 
1271 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1272 {
1273   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1274 
1275   PetscFunctionBegin;
1276   /* do nondiagonal part */
1277   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1278   /* do local part */
1279   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1280   /* add partial results together */
1281   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1282   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1283   PetscFunctionReturn(PETSC_SUCCESS);
1284 }
1285 
1286 /*
1287   This only works correctly for square matrices where the subblock A->A is the
1288    diagonal block
1289 */
1290 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1291 {
1292   PetscFunctionBegin;
1293   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1294   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1295   PetscFunctionReturn(PETSC_SUCCESS);
1296 }
1297 
1298 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1299 {
1300   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1301 
1302   PetscFunctionBegin;
1303   PetscCall(MatScale(a->A, aa));
1304   PetscCall(MatScale(a->B, aa));
1305   PetscFunctionReturn(PETSC_SUCCESS);
1306 }
1307 
1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1309 {
1310   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1311   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1312   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1313   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1314   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1315 
1316   PetscFunctionBegin;
1317   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1318   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1319   mat->getrowactive = PETSC_TRUE;
1320 
1321   if (!mat->rowvalues && (idx || v)) {
1322     /*
1323         allocate enough space to hold information from the longest row.
1324     */
1325     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1326     PetscInt     max = 1, mbs = mat->mbs, tmp;
1327     for (i = 0; i < mbs; i++) {
1328       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1329       if (max < tmp) max = tmp;
1330     }
1331     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1332   }
1333   lrow = row - brstart;
1334 
1335   pvA = &vworkA;
1336   pcA = &cworkA;
1337   pvB = &vworkB;
1338   pcB = &cworkB;
1339   if (!v) {
1340     pvA = NULL;
1341     pvB = NULL;
1342   }
1343   if (!idx) {
1344     pcA = NULL;
1345     if (!v) pcB = NULL;
1346   }
1347   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1348   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1349   nztot = nzA + nzB;
1350 
1351   cmap = mat->garray;
1352   if (v || idx) {
1353     if (nztot) {
1354       /* Sort by increasing column numbers, assuming A and B already sorted */
1355       PetscInt imark = -1;
1356       if (v) {
1357         *v = v_p = mat->rowvalues;
1358         for (i = 0; i < nzB; i++) {
1359           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1360           else break;
1361         }
1362         imark = i;
1363         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1364         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1365       }
1366       if (idx) {
1367         *idx = idx_p = mat->rowindices;
1368         if (imark > -1) {
1369           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1370         } else {
1371           for (i = 0; i < nzB; i++) {
1372             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1373             else break;
1374           }
1375           imark = i;
1376         }
1377         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1378         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1379       }
1380     } else {
1381       if (idx) *idx = NULL;
1382       if (v) *v = NULL;
1383     }
1384   }
1385   *nz = nztot;
1386   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1387   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1388   PetscFunctionReturn(PETSC_SUCCESS);
1389 }
1390 
1391 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1392 {
1393   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1394 
1395   PetscFunctionBegin;
1396   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1397   baij->getrowactive = PETSC_FALSE;
1398   PetscFunctionReturn(PETSC_SUCCESS);
1399 }
1400 
1401 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1402 {
1403   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1404 
1405   PetscFunctionBegin;
1406   PetscCall(MatZeroEntries(l->A));
1407   PetscCall(MatZeroEntries(l->B));
1408   PetscFunctionReturn(PETSC_SUCCESS);
1409 }
1410 
1411 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1412 {
1413   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1414   Mat            A = a->A, B = a->B;
1415   PetscLogDouble isend[5], irecv[5];
1416 
1417   PetscFunctionBegin;
1418   info->block_size = (PetscReal)matin->rmap->bs;
1419 
1420   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1421 
1422   isend[0] = info->nz_used;
1423   isend[1] = info->nz_allocated;
1424   isend[2] = info->nz_unneeded;
1425   isend[3] = info->memory;
1426   isend[4] = info->mallocs;
1427 
1428   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1429 
1430   isend[0] += info->nz_used;
1431   isend[1] += info->nz_allocated;
1432   isend[2] += info->nz_unneeded;
1433   isend[3] += info->memory;
1434   isend[4] += info->mallocs;
1435 
1436   if (flag == MAT_LOCAL) {
1437     info->nz_used      = isend[0];
1438     info->nz_allocated = isend[1];
1439     info->nz_unneeded  = isend[2];
1440     info->memory       = isend[3];
1441     info->mallocs      = isend[4];
1442   } else if (flag == MAT_GLOBAL_MAX) {
1443     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1444 
1445     info->nz_used      = irecv[0];
1446     info->nz_allocated = irecv[1];
1447     info->nz_unneeded  = irecv[2];
1448     info->memory       = irecv[3];
1449     info->mallocs      = irecv[4];
1450   } else if (flag == MAT_GLOBAL_SUM) {
1451     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1452 
1453     info->nz_used      = irecv[0];
1454     info->nz_allocated = irecv[1];
1455     info->nz_unneeded  = irecv[2];
1456     info->memory       = irecv[3];
1457     info->mallocs      = irecv[4];
1458   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1459   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1460   info->fill_ratio_needed = 0;
1461   info->factor_mallocs    = 0;
1462   PetscFunctionReturn(PETSC_SUCCESS);
1463 }
1464 
1465 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1466 {
1467   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1468 
1469   PetscFunctionBegin;
1470   switch (op) {
1471   case MAT_NEW_NONZERO_LOCATIONS:
1472   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1474   case MAT_KEEP_NONZERO_PATTERN:
1475   case MAT_NEW_NONZERO_LOCATION_ERR:
1476     MatCheckPreallocated(A, 1);
1477     PetscCall(MatSetOption(a->A, op, flg));
1478     PetscCall(MatSetOption(a->B, op, flg));
1479     break;
1480   case MAT_ROW_ORIENTED:
1481     MatCheckPreallocated(A, 1);
1482     a->roworiented = flg;
1483 
1484     PetscCall(MatSetOption(a->A, op, flg));
1485     PetscCall(MatSetOption(a->B, op, flg));
1486     break;
1487   case MAT_FORCE_DIAGONAL_ENTRIES:
1488   case MAT_SORTED_FULL:
1489     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1490     break;
1491   case MAT_IGNORE_OFF_PROC_ENTRIES:
1492     a->donotstash = flg;
1493     break;
1494   case MAT_USE_HASH_TABLE:
1495     a->ht_flag = flg;
1496     a->ht_fact = 1.39;
1497     break;
1498   case MAT_SYMMETRIC:
1499   case MAT_STRUCTURALLY_SYMMETRIC:
1500   case MAT_HERMITIAN:
1501   case MAT_SUBMAT_SINGLEIS:
1502   case MAT_SYMMETRY_ETERNAL:
1503   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1504   case MAT_SPD_ETERNAL:
1505     /* if the diagonal matrix is square it inherits some of the properties above */
1506     break;
1507   default:
1508     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1509   }
1510   PetscFunctionReturn(PETSC_SUCCESS);
1511 }
1512 
1513 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1514 {
1515   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1516   Mat_SeqBAIJ *Aloc;
1517   Mat          B;
1518   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1519   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1520   MatScalar   *a;
1521 
1522   PetscFunctionBegin;
1523   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1524   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1525     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1526     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1527     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1528     /* Do not know preallocation information, but must set block size */
1529     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1530   } else {
1531     B = *matout;
1532   }
1533 
1534   /* copy over the A part */
1535   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1536   ai   = Aloc->i;
1537   aj   = Aloc->j;
1538   a    = Aloc->a;
1539   PetscCall(PetscMalloc1(bs, &rvals));
1540 
1541   for (i = 0; i < mbs; i++) {
1542     rvals[0] = bs * (baij->rstartbs + i);
1543     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1544     for (j = ai[i]; j < ai[i + 1]; j++) {
1545       col = (baij->cstartbs + aj[j]) * bs;
1546       for (k = 0; k < bs; k++) {
1547         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1548 
1549         col++;
1550         a += bs;
1551       }
1552     }
1553   }
1554   /* copy over the B part */
1555   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1556   ai   = Aloc->i;
1557   aj   = Aloc->j;
1558   a    = Aloc->a;
1559   for (i = 0; i < mbs; i++) {
1560     rvals[0] = bs * (baij->rstartbs + i);
1561     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1562     for (j = ai[i]; j < ai[i + 1]; j++) {
1563       col = baij->garray[aj[j]] * bs;
1564       for (k = 0; k < bs; k++) {
1565         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1566         col++;
1567         a += bs;
1568       }
1569     }
1570   }
1571   PetscCall(PetscFree(rvals));
1572   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1574 
1575   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1576   else PetscCall(MatHeaderMerge(A, &B));
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1581 {
1582   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1583   Mat          a = baij->A, b = baij->B;
1584   PetscInt     s1, s2, s3;
1585 
1586   PetscFunctionBegin;
1587   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1588   if (rr) {
1589     PetscCall(VecGetLocalSize(rr, &s1));
1590     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1591     /* Overlap communication with computation. */
1592     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1593   }
1594   if (ll) {
1595     PetscCall(VecGetLocalSize(ll, &s1));
1596     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1597     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1598   }
1599   /* scale  the diagonal block */
1600   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1601 
1602   if (rr) {
1603     /* Do a scatter end and then right scale the off-diagonal block */
1604     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1605     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1606   }
1607   PetscFunctionReturn(PETSC_SUCCESS);
1608 }
1609 
1610 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1611 {
1612   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1613   PetscInt    *lrows;
1614   PetscInt     r, len;
1615   PetscBool    cong;
1616 
1617   PetscFunctionBegin;
1618   /* get locally owned rows */
1619   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1620   /* fix right hand side if needed */
1621   if (x && b) {
1622     const PetscScalar *xx;
1623     PetscScalar       *bb;
1624 
1625     PetscCall(VecGetArrayRead(x, &xx));
1626     PetscCall(VecGetArray(b, &bb));
1627     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1628     PetscCall(VecRestoreArrayRead(x, &xx));
1629     PetscCall(VecRestoreArray(b, &bb));
1630   }
1631 
1632   /* actually zap the local rows */
1633   /*
1634         Zero the required rows. If the "diagonal block" of the matrix
1635      is square and the user wishes to set the diagonal we use separate
1636      code so that MatSetValues() is not called for each diagonal allocating
1637      new memory, thus calling lots of mallocs and slowing things down.
1638 
1639   */
1640   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1641   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1642   PetscCall(MatHasCongruentLayouts(A, &cong));
1643   if ((diag != 0.0) && cong) {
1644     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1645   } else if (diag != 0.0) {
1646     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1647     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1648        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1649     for (r = 0; r < len; ++r) {
1650       const PetscInt row = lrows[r] + A->rmap->rstart;
1651       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1652     }
1653     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1654     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1655   } else {
1656     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1657   }
1658   PetscCall(PetscFree(lrows));
1659 
1660   /* only change matrix nonzero state if pattern was allowed to be changed */
1661   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern || !((Mat_SeqBAIJ *)(l->A->data))->nonew) {
1662     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1663     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1664   }
1665   PetscFunctionReturn(PETSC_SUCCESS);
1666 }
1667 
1668 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1669 {
1670   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1671   PetscMPIInt        n = A->rmap->n, p = 0;
1672   PetscInt           i, j, k, r, len = 0, row, col, count;
1673   PetscInt          *lrows, *owners = A->rmap->range;
1674   PetscSFNode       *rrows;
1675   PetscSF            sf;
1676   const PetscScalar *xx;
1677   PetscScalar       *bb, *mask;
1678   Vec                xmask, lmask;
1679   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1680   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1681   PetscScalar       *aa;
1682 
1683   PetscFunctionBegin;
1684   /* Create SF where leaves are input rows and roots are owned rows */
1685   PetscCall(PetscMalloc1(n, &lrows));
1686   for (r = 0; r < n; ++r) lrows[r] = -1;
1687   PetscCall(PetscMalloc1(N, &rrows));
1688   for (r = 0; r < N; ++r) {
1689     const PetscInt idx = rows[r];
1690     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1691     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1692       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1693     }
1694     rrows[r].rank  = p;
1695     rrows[r].index = rows[r] - owners[p];
1696   }
1697   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1698   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1699   /* Collect flags for rows to be zeroed */
1700   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1701   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1702   PetscCall(PetscSFDestroy(&sf));
1703   /* Compress and put in row numbers */
1704   for (r = 0; r < n; ++r)
1705     if (lrows[r] >= 0) lrows[len++] = r;
1706   /* zero diagonal part of matrix */
1707   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1708   /* handle off-diagonal part of matrix */
1709   PetscCall(MatCreateVecs(A, &xmask, NULL));
1710   PetscCall(VecDuplicate(l->lvec, &lmask));
1711   PetscCall(VecGetArray(xmask, &bb));
1712   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1713   PetscCall(VecRestoreArray(xmask, &bb));
1714   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1715   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1716   PetscCall(VecDestroy(&xmask));
1717   if (x) {
1718     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1719     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1720     PetscCall(VecGetArrayRead(l->lvec, &xx));
1721     PetscCall(VecGetArray(b, &bb));
1722   }
1723   PetscCall(VecGetArray(lmask, &mask));
1724   /* remove zeroed rows of off-diagonal matrix */
1725   for (i = 0; i < len; ++i) {
1726     row   = lrows[i];
1727     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1728     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1729     for (k = 0; k < count; ++k) {
1730       aa[0] = 0.0;
1731       aa += bs;
1732     }
1733   }
1734   /* loop over all elements of off process part of matrix zeroing removed columns*/
1735   for (i = 0; i < l->B->rmap->N; ++i) {
1736     row = i / bs;
1737     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1738       for (k = 0; k < bs; ++k) {
1739         col = bs * baij->j[j] + k;
1740         if (PetscAbsScalar(mask[col])) {
1741           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1742           if (x) bb[i] -= aa[0] * xx[col];
1743           aa[0] = 0.0;
1744         }
1745       }
1746     }
1747   }
1748   if (x) {
1749     PetscCall(VecRestoreArray(b, &bb));
1750     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1751   }
1752   PetscCall(VecRestoreArray(lmask, &mask));
1753   PetscCall(VecDestroy(&lmask));
1754   PetscCall(PetscFree(lrows));
1755 
1756   /* only change matrix nonzero state if pattern was allowed to be changed */
1757   if (!((Mat_SeqBAIJ *)(l->A->data))->nonew) {
1758     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1759     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1760   }
1761   PetscFunctionReturn(PETSC_SUCCESS);
1762 }
1763 
1764 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1765 {
1766   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1767 
1768   PetscFunctionBegin;
1769   PetscCall(MatSetUnfactored(a->A));
1770   PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772 
1773 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1774 
1775 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1776 {
1777   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1778   Mat          a, b, c, d;
1779   PetscBool    flg;
1780 
1781   PetscFunctionBegin;
1782   a = matA->A;
1783   b = matA->B;
1784   c = matB->A;
1785   d = matB->B;
1786 
1787   PetscCall(MatEqual(a, c, &flg));
1788   if (flg) PetscCall(MatEqual(b, d, &flg));
1789   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1790   PetscFunctionReturn(PETSC_SUCCESS);
1791 }
1792 
1793 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1794 {
1795   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1796   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1797 
1798   PetscFunctionBegin;
1799   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1800   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1801     PetscCall(MatCopy_Basic(A, B, str));
1802   } else {
1803     PetscCall(MatCopy(a->A, b->A, str));
1804     PetscCall(MatCopy(a->B, b->B, str));
1805   }
1806   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1807   PetscFunctionReturn(PETSC_SUCCESS);
1808 }
1809 
1810 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1811 {
1812   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1813   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1814   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1815 
1816   PetscFunctionBegin;
1817   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1818   PetscFunctionReturn(PETSC_SUCCESS);
1819 }
1820 
1821 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1822 {
1823   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1824   PetscBLASInt bnz, one                         = 1;
1825   Mat_SeqBAIJ *x, *y;
1826   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1827 
1828   PetscFunctionBegin;
1829   if (str == SAME_NONZERO_PATTERN) {
1830     PetscScalar alpha = a;
1831     x                 = (Mat_SeqBAIJ *)xx->A->data;
1832     y                 = (Mat_SeqBAIJ *)yy->A->data;
1833     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1834     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1835     x = (Mat_SeqBAIJ *)xx->B->data;
1836     y = (Mat_SeqBAIJ *)yy->B->data;
1837     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1838     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1839     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1840   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1841     PetscCall(MatAXPY_Basic(Y, a, X, str));
1842   } else {
1843     Mat       B;
1844     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1845     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1846     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1847     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1848     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1849     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1850     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1851     PetscCall(MatSetType(B, MATMPIBAIJ));
1852     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1853     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1854     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1855     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1856     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1857     PetscCall(MatHeaderMerge(Y, &B));
1858     PetscCall(PetscFree(nnz_d));
1859     PetscCall(PetscFree(nnz_o));
1860   }
1861   PetscFunctionReturn(PETSC_SUCCESS);
1862 }
1863 
1864 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1865 {
1866   PetscFunctionBegin;
1867   if (PetscDefined(USE_COMPLEX)) {
1868     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1869 
1870     PetscCall(MatConjugate_SeqBAIJ(a->A));
1871     PetscCall(MatConjugate_SeqBAIJ(a->B));
1872   }
1873   PetscFunctionReturn(PETSC_SUCCESS);
1874 }
1875 
1876 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1877 {
1878   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1879 
1880   PetscFunctionBegin;
1881   PetscCall(MatRealPart(a->A));
1882   PetscCall(MatRealPart(a->B));
1883   PetscFunctionReturn(PETSC_SUCCESS);
1884 }
1885 
1886 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1887 {
1888   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1889 
1890   PetscFunctionBegin;
1891   PetscCall(MatImaginaryPart(a->A));
1892   PetscCall(MatImaginaryPart(a->B));
1893   PetscFunctionReturn(PETSC_SUCCESS);
1894 }
1895 
1896 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1897 {
1898   IS       iscol_local;
1899   PetscInt csize;
1900 
1901   PetscFunctionBegin;
1902   PetscCall(ISGetLocalSize(iscol, &csize));
1903   if (call == MAT_REUSE_MATRIX) {
1904     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1905     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1906   } else {
1907     PetscCall(ISAllGather(iscol, &iscol_local));
1908   }
1909   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1910   if (call == MAT_INITIAL_MATRIX) {
1911     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1912     PetscCall(ISDestroy(&iscol_local));
1913   }
1914   PetscFunctionReturn(PETSC_SUCCESS);
1915 }
1916 
1917 /*
1918   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1919   in local and then by concatenating the local matrices the end result.
1920   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1921   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1922 */
1923 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1924 {
1925   PetscMPIInt  rank, size;
1926   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1927   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1928   Mat          M, Mreuse;
1929   MatScalar   *vwork, *aa;
1930   MPI_Comm     comm;
1931   IS           isrow_new, iscol_new;
1932   Mat_SeqBAIJ *aij;
1933 
1934   PetscFunctionBegin;
1935   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1936   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1937   PetscCallMPI(MPI_Comm_size(comm, &size));
1938   /* The compression and expansion should be avoided. Doesn't point
1939      out errors, might change the indices, hence buggey */
1940   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1941   if (isrow == iscol) {
1942     iscol_new = isrow_new;
1943     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1944   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1945 
1946   if (call == MAT_REUSE_MATRIX) {
1947     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1948     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1949     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1950   } else {
1951     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1952   }
1953   PetscCall(ISDestroy(&isrow_new));
1954   PetscCall(ISDestroy(&iscol_new));
1955   /*
1956       m - number of local rows
1957       n - number of columns (same on all processors)
1958       rstart - first row in new global matrix generated
1959   */
1960   PetscCall(MatGetBlockSize(mat, &bs));
1961   PetscCall(MatGetSize(Mreuse, &m, &n));
1962   m = m / bs;
1963   n = n / bs;
1964 
1965   if (call == MAT_INITIAL_MATRIX) {
1966     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1967     ii  = aij->i;
1968     jj  = aij->j;
1969 
1970     /*
1971         Determine the number of non-zeros in the diagonal and off-diagonal
1972         portions of the matrix in order to do correct preallocation
1973     */
1974 
1975     /* first get start and end of "diagonal" columns */
1976     if (csize == PETSC_DECIDE) {
1977       PetscCall(ISGetSize(isrow, &mglobal));
1978       if (mglobal == n * bs) { /* square matrix */
1979         nlocal = m;
1980       } else {
1981         nlocal = n / size + ((n % size) > rank);
1982       }
1983     } else {
1984       nlocal = csize / bs;
1985     }
1986     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1987     rstart = rend - nlocal;
1988     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1989 
1990     /* next, compute all the lengths */
1991     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1992     for (i = 0; i < m; i++) {
1993       jend = ii[i + 1] - ii[i];
1994       olen = 0;
1995       dlen = 0;
1996       for (j = 0; j < jend; j++) {
1997         if (*jj < rstart || *jj >= rend) olen++;
1998         else dlen++;
1999         jj++;
2000       }
2001       olens[i] = olen;
2002       dlens[i] = dlen;
2003     }
2004     PetscCall(MatCreate(comm, &M));
2005     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2006     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2007     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2008     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2009     PetscCall(PetscFree2(dlens, olens));
2010   } else {
2011     PetscInt ml, nl;
2012 
2013     M = *newmat;
2014     PetscCall(MatGetLocalSize(M, &ml, &nl));
2015     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2016     PetscCall(MatZeroEntries(M));
2017     /*
2018          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2019        rather than the slower MatSetValues().
2020     */
2021     M->was_assembled = PETSC_TRUE;
2022     M->assembled     = PETSC_FALSE;
2023   }
2024   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2025   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2026   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2027   ii  = aij->i;
2028   jj  = aij->j;
2029   aa  = aij->a;
2030   for (i = 0; i < m; i++) {
2031     row   = rstart / bs + i;
2032     nz    = ii[i + 1] - ii[i];
2033     cwork = jj;
2034     jj    = PetscSafePointerPlusOffset(jj, nz);
2035     vwork = aa;
2036     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2037     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2038   }
2039 
2040   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2041   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2042   *newmat = M;
2043 
2044   /* save submatrix used in processor for next request */
2045   if (call == MAT_INITIAL_MATRIX) {
2046     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2047     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2048   }
2049   PetscFunctionReturn(PETSC_SUCCESS);
2050 }
2051 
2052 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2053 {
2054   MPI_Comm        comm, pcomm;
2055   PetscInt        clocal_size, nrows;
2056   const PetscInt *rows;
2057   PetscMPIInt     size;
2058   IS              crowp, lcolp;
2059 
2060   PetscFunctionBegin;
2061   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2062   /* make a collective version of 'rowp' */
2063   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2064   if (pcomm == comm) {
2065     crowp = rowp;
2066   } else {
2067     PetscCall(ISGetSize(rowp, &nrows));
2068     PetscCall(ISGetIndices(rowp, &rows));
2069     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2070     PetscCall(ISRestoreIndices(rowp, &rows));
2071   }
2072   PetscCall(ISSetPermutation(crowp));
2073   /* make a local version of 'colp' */
2074   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2075   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2076   if (size == 1) {
2077     lcolp = colp;
2078   } else {
2079     PetscCall(ISAllGather(colp, &lcolp));
2080   }
2081   PetscCall(ISSetPermutation(lcolp));
2082   /* now we just get the submatrix */
2083   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2084   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2085   /* clean up */
2086   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2087   if (size > 1) PetscCall(ISDestroy(&lcolp));
2088   PetscFunctionReturn(PETSC_SUCCESS);
2089 }
2090 
2091 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2092 {
2093   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2094   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2095 
2096   PetscFunctionBegin;
2097   if (nghosts) *nghosts = B->nbs;
2098   if (ghosts) *ghosts = baij->garray;
2099   PetscFunctionReturn(PETSC_SUCCESS);
2100 }
2101 
2102 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2103 {
2104   Mat          B;
2105   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2106   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2107   Mat_SeqAIJ  *b;
2108   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2109   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2110   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2111 
2112   PetscFunctionBegin;
2113   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2114   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2115 
2116   /*   Tell every processor the number of nonzeros per row  */
2117   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2118   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2119   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2120   displs = recvcounts + size;
2121   for (i = 0; i < size; i++) {
2122     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2123     displs[i]     = A->rmap->range[i] / bs;
2124   }
2125   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2126   /* Create the sequential matrix of the same type as the local block diagonal  */
2127   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2128   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2129   PetscCall(MatSetType(B, MATSEQAIJ));
2130   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2131   b = (Mat_SeqAIJ *)B->data;
2132 
2133   /*     Copy my part of matrix column indices over  */
2134   sendcount  = ad->nz + bd->nz;
2135   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2136   a_jsendbuf = ad->j;
2137   b_jsendbuf = bd->j;
2138   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2139   cnt        = 0;
2140   for (i = 0; i < n; i++) {
2141     /* put in lower diagonal portion */
2142     m = bd->i[i + 1] - bd->i[i];
2143     while (m > 0) {
2144       /* is it above diagonal (in bd (compressed) numbering) */
2145       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2146       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2147       m--;
2148     }
2149 
2150     /* put in diagonal portion */
2151     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2152 
2153     /* put in upper diagonal portion */
2154     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2155   }
2156   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2157 
2158   /*  Gather all column indices to all processors  */
2159   for (i = 0; i < size; i++) {
2160     recvcounts[i] = 0;
2161     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2162   }
2163   displs[0] = 0;
2164   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2165   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2166   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2167   /* set the b->ilen (length of each row) values */
2168   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2169   /* set the b->i indices */
2170   b->i[0] = 0;
2171   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2172   PetscCall(PetscFree(lens));
2173   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2174   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2175   PetscCall(PetscFree(recvcounts));
2176 
2177   PetscCall(MatPropagateSymmetryOptions(A, B));
2178   *newmat = B;
2179   PetscFunctionReturn(PETSC_SUCCESS);
2180 }
2181 
2182 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2183 {
2184   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2185   Vec          bb1 = NULL;
2186 
2187   PetscFunctionBegin;
2188   if (flag == SOR_APPLY_UPPER) {
2189     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2190     PetscFunctionReturn(PETSC_SUCCESS);
2191   }
2192 
2193   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2194 
2195   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2196     if (flag & SOR_ZERO_INITIAL_GUESS) {
2197       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2198       its--;
2199     }
2200 
2201     while (its--) {
2202       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2203       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2204 
2205       /* update rhs: bb1 = bb - B*x */
2206       PetscCall(VecScale(mat->lvec, -1.0));
2207       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2208 
2209       /* local sweep */
2210       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2211     }
2212   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2213     if (flag & SOR_ZERO_INITIAL_GUESS) {
2214       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2215       its--;
2216     }
2217     while (its--) {
2218       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2219       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2220 
2221       /* update rhs: bb1 = bb - B*x */
2222       PetscCall(VecScale(mat->lvec, -1.0));
2223       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2224 
2225       /* local sweep */
2226       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2227     }
2228   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2229     if (flag & SOR_ZERO_INITIAL_GUESS) {
2230       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2231       its--;
2232     }
2233     while (its--) {
2234       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2235       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2236 
2237       /* update rhs: bb1 = bb - B*x */
2238       PetscCall(VecScale(mat->lvec, -1.0));
2239       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2240 
2241       /* local sweep */
2242       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2243     }
2244   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2245 
2246   PetscCall(VecDestroy(&bb1));
2247   PetscFunctionReturn(PETSC_SUCCESS);
2248 }
2249 
2250 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2251 {
2252   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2253   PetscInt     m, N, i, *garray = aij->garray;
2254   PetscInt     ib, jb, bs = A->rmap->bs;
2255   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2256   MatScalar   *a_val = a_aij->a;
2257   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2258   MatScalar   *b_val = b_aij->a;
2259   PetscReal   *work;
2260 
2261   PetscFunctionBegin;
2262   PetscCall(MatGetSize(A, &m, &N));
2263   PetscCall(PetscCalloc1(N, &work));
2264   if (type == NORM_2) {
2265     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2266       for (jb = 0; jb < bs; jb++) {
2267         for (ib = 0; ib < bs; ib++) {
2268           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2269           a_val++;
2270         }
2271       }
2272     }
2273     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2274       for (jb = 0; jb < bs; jb++) {
2275         for (ib = 0; ib < bs; ib++) {
2276           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2277           b_val++;
2278         }
2279       }
2280     }
2281   } else if (type == NORM_1) {
2282     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2283       for (jb = 0; jb < bs; jb++) {
2284         for (ib = 0; ib < bs; ib++) {
2285           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2286           a_val++;
2287         }
2288       }
2289     }
2290     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2291       for (jb = 0; jb < bs; jb++) {
2292         for (ib = 0; ib < bs; ib++) {
2293           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2294           b_val++;
2295         }
2296       }
2297     }
2298   } else if (type == NORM_INFINITY) {
2299     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2300       for (jb = 0; jb < bs; jb++) {
2301         for (ib = 0; ib < bs; ib++) {
2302           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2303           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2304           a_val++;
2305         }
2306       }
2307     }
2308     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2309       for (jb = 0; jb < bs; jb++) {
2310         for (ib = 0; ib < bs; ib++) {
2311           int col   = garray[b_aij->j[i]] * bs + jb;
2312           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2313           b_val++;
2314         }
2315       }
2316     }
2317   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2318     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2319       for (jb = 0; jb < bs; jb++) {
2320         for (ib = 0; ib < bs; ib++) {
2321           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2322           a_val++;
2323         }
2324       }
2325     }
2326     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2327       for (jb = 0; jb < bs; jb++) {
2328         for (ib = 0; ib < bs; ib++) {
2329           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2330           b_val++;
2331         }
2332       }
2333     }
2334   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2335     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2336       for (jb = 0; jb < bs; jb++) {
2337         for (ib = 0; ib < bs; ib++) {
2338           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2339           a_val++;
2340         }
2341       }
2342     }
2343     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2344       for (jb = 0; jb < bs; jb++) {
2345         for (ib = 0; ib < bs; ib++) {
2346           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2347           b_val++;
2348         }
2349       }
2350     }
2351   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2352   if (type == NORM_INFINITY) {
2353     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2354   } else {
2355     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2356   }
2357   PetscCall(PetscFree(work));
2358   if (type == NORM_2) {
2359     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2360   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2361     for (i = 0; i < N; i++) reductions[i] /= m;
2362   }
2363   PetscFunctionReturn(PETSC_SUCCESS);
2364 }
2365 
2366 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2367 {
2368   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2369 
2370   PetscFunctionBegin;
2371   PetscCall(MatInvertBlockDiagonal(a->A, values));
2372   A->factorerrortype             = a->A->factorerrortype;
2373   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2374   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2375   PetscFunctionReturn(PETSC_SUCCESS);
2376 }
2377 
2378 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2379 {
2380   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2381   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2382 
2383   PetscFunctionBegin;
2384   if (!Y->preallocated) {
2385     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2386   } else if (!aij->nz) {
2387     PetscInt nonew = aij->nonew;
2388     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2389     aij->nonew = nonew;
2390   }
2391   PetscCall(MatShift_Basic(Y, a));
2392   PetscFunctionReturn(PETSC_SUCCESS);
2393 }
2394 
2395 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2396 {
2397   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2398 
2399   PetscFunctionBegin;
2400   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2401   PetscCall(MatMissingDiagonal(a->A, missing, d));
2402   if (d) {
2403     PetscInt rstart;
2404     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2405     *d += rstart / A->rmap->bs;
2406   }
2407   PetscFunctionReturn(PETSC_SUCCESS);
2408 }
2409 
2410 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2411 {
2412   PetscFunctionBegin;
2413   *a = ((Mat_MPIBAIJ *)A->data)->A;
2414   PetscFunctionReturn(PETSC_SUCCESS);
2415 }
2416 
2417 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2418 {
2419   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2420 
2421   PetscFunctionBegin;
2422   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2423   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2424   PetscFunctionReturn(PETSC_SUCCESS);
2425 }
2426 
2427 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2428                                        MatGetRow_MPIBAIJ,
2429                                        MatRestoreRow_MPIBAIJ,
2430                                        MatMult_MPIBAIJ,
2431                                        /* 4*/ MatMultAdd_MPIBAIJ,
2432                                        MatMultTranspose_MPIBAIJ,
2433                                        MatMultTransposeAdd_MPIBAIJ,
2434                                        NULL,
2435                                        NULL,
2436                                        NULL,
2437                                        /*10*/ NULL,
2438                                        NULL,
2439                                        NULL,
2440                                        MatSOR_MPIBAIJ,
2441                                        MatTranspose_MPIBAIJ,
2442                                        /*15*/ MatGetInfo_MPIBAIJ,
2443                                        MatEqual_MPIBAIJ,
2444                                        MatGetDiagonal_MPIBAIJ,
2445                                        MatDiagonalScale_MPIBAIJ,
2446                                        MatNorm_MPIBAIJ,
2447                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2448                                        MatAssemblyEnd_MPIBAIJ,
2449                                        MatSetOption_MPIBAIJ,
2450                                        MatZeroEntries_MPIBAIJ,
2451                                        /*24*/ MatZeroRows_MPIBAIJ,
2452                                        NULL,
2453                                        NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        /*29*/ MatSetUp_MPI_Hash,
2457                                        NULL,
2458                                        NULL,
2459                                        MatGetDiagonalBlock_MPIBAIJ,
2460                                        NULL,
2461                                        /*34*/ MatDuplicate_MPIBAIJ,
2462                                        NULL,
2463                                        NULL,
2464                                        NULL,
2465                                        NULL,
2466                                        /*39*/ MatAXPY_MPIBAIJ,
2467                                        MatCreateSubMatrices_MPIBAIJ,
2468                                        MatIncreaseOverlap_MPIBAIJ,
2469                                        MatGetValues_MPIBAIJ,
2470                                        MatCopy_MPIBAIJ,
2471                                        /*44*/ NULL,
2472                                        MatScale_MPIBAIJ,
2473                                        MatShift_MPIBAIJ,
2474                                        NULL,
2475                                        MatZeroRowsColumns_MPIBAIJ,
2476                                        /*49*/ NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2482                                        NULL,
2483                                        MatSetUnfactored_MPIBAIJ,
2484                                        MatPermute_MPIBAIJ,
2485                                        MatSetValuesBlocked_MPIBAIJ,
2486                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2487                                        MatDestroy_MPIBAIJ,
2488                                        MatView_MPIBAIJ,
2489                                        NULL,
2490                                        NULL,
2491                                        /*64*/ NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2497                                        NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        /*74*/ NULL,
2502                                        MatFDColoringApply_BAIJ,
2503                                        NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        /*79*/ NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        MatLoad_MPIBAIJ,
2511                                        /*84*/ NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        /*89*/ NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        /*94*/ NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        /*99*/ NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        MatConjugate_MPIBAIJ,
2530                                        NULL,
2531                                        /*104*/ NULL,
2532                                        MatRealPart_MPIBAIJ,
2533                                        MatImaginaryPart_MPIBAIJ,
2534                                        NULL,
2535                                        NULL,
2536                                        /*109*/ NULL,
2537                                        NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        MatMissingDiagonal_MPIBAIJ,
2541                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2542                                        NULL,
2543                                        MatGetGhosts_MPIBAIJ,
2544                                        NULL,
2545                                        NULL,
2546                                        /*119*/ NULL,
2547                                        NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        MatGetMultiProcBlock_MPIBAIJ,
2551                                        /*124*/ NULL,
2552                                        MatGetColumnReductions_MPIBAIJ,
2553                                        MatInvertBlockDiagonal_MPIBAIJ,
2554                                        NULL,
2555                                        NULL,
2556                                        /*129*/ NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        /*134*/ NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        /*139*/ MatSetBlockSizes_Default,
2567                                        NULL,
2568                                        NULL,
2569                                        MatFDColoringSetUp_MPIXAIJ,
2570                                        NULL,
2571                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                        /*150*/ NULL,
2578                                        MatEliminateZeros_MPIBAIJ,
2579                                        NULL};
2580 
2581 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2582 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2583 
2584 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2585 {
2586   PetscInt        m, rstart, cstart, cend;
2587   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2588   const PetscInt *JJ          = NULL;
2589   PetscScalar    *values      = NULL;
2590   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2591   PetscBool       nooffprocentries;
2592 
2593   PetscFunctionBegin;
2594   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2595   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2596   PetscCall(PetscLayoutSetUp(B->rmap));
2597   PetscCall(PetscLayoutSetUp(B->cmap));
2598   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2599   m      = B->rmap->n / bs;
2600   rstart = B->rmap->rstart / bs;
2601   cstart = B->cmap->rstart / bs;
2602   cend   = B->cmap->rend / bs;
2603 
2604   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2605   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2606   for (i = 0; i < m; i++) {
2607     nz = ii[i + 1] - ii[i];
2608     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2609     nz_max = PetscMax(nz_max, nz);
2610     dlen   = 0;
2611     olen   = 0;
2612     JJ     = jj + ii[i];
2613     for (j = 0; j < nz; j++) {
2614       if (*JJ < cstart || *JJ >= cend) olen++;
2615       else dlen++;
2616       JJ++;
2617     }
2618     d_nnz[i] = dlen;
2619     o_nnz[i] = olen;
2620   }
2621   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2622   PetscCall(PetscFree2(d_nnz, o_nnz));
2623 
2624   values = (PetscScalar *)V;
2625   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2626   for (i = 0; i < m; i++) {
2627     PetscInt        row   = i + rstart;
2628     PetscInt        ncols = ii[i + 1] - ii[i];
2629     const PetscInt *icols = jj + ii[i];
2630     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2631       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2632       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2633     } else { /* block ordering does not match so we can only insert one block at a time. */
2634       PetscInt j;
2635       for (j = 0; j < ncols; j++) {
2636         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2637         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2638       }
2639     }
2640   }
2641 
2642   if (!V) PetscCall(PetscFree(values));
2643   nooffprocentries    = B->nooffprocentries;
2644   B->nooffprocentries = PETSC_TRUE;
2645   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2646   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2647   B->nooffprocentries = nooffprocentries;
2648 
2649   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2650   PetscFunctionReturn(PETSC_SUCCESS);
2651 }
2652 
2653 /*@C
2654   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2655 
2656   Collective
2657 
2658   Input Parameters:
2659 + B  - the matrix
2660 . bs - the block size
2661 . i  - the indices into `j` for the start of each local row (starts with zero)
2662 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2663 - v  - optional values in the matrix
2664 
2665   Level: advanced
2666 
2667   Notes:
2668   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2669   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2670   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2671   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2672   block column and the second index is over columns within a block.
2673 
2674   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2675 
2676 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2677 @*/
2678 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2679 {
2680   PetscFunctionBegin;
2681   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2682   PetscValidType(B, 1);
2683   PetscValidLogicalCollectiveInt(B, bs, 2);
2684   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2685   PetscFunctionReturn(PETSC_SUCCESS);
2686 }
2687 
2688 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2689 {
2690   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2691   PetscInt     i;
2692   PetscMPIInt  size;
2693 
2694   PetscFunctionBegin;
2695   if (B->hash_active) {
2696     B->ops[0]      = b->cops;
2697     B->hash_active = PETSC_FALSE;
2698   }
2699   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2700   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2701   PetscCall(PetscLayoutSetUp(B->rmap));
2702   PetscCall(PetscLayoutSetUp(B->cmap));
2703   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2704 
2705   if (d_nnz) {
2706     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2707   }
2708   if (o_nnz) {
2709     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2710   }
2711 
2712   b->bs2 = bs * bs;
2713   b->mbs = B->rmap->n / bs;
2714   b->nbs = B->cmap->n / bs;
2715   b->Mbs = B->rmap->N / bs;
2716   b->Nbs = B->cmap->N / bs;
2717 
2718   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2719   b->rstartbs = B->rmap->rstart / bs;
2720   b->rendbs   = B->rmap->rend / bs;
2721   b->cstartbs = B->cmap->rstart / bs;
2722   b->cendbs   = B->cmap->rend / bs;
2723 
2724 #if defined(PETSC_USE_CTABLE)
2725   PetscCall(PetscHMapIDestroy(&b->colmap));
2726 #else
2727   PetscCall(PetscFree(b->colmap));
2728 #endif
2729   PetscCall(PetscFree(b->garray));
2730   PetscCall(VecDestroy(&b->lvec));
2731   PetscCall(VecScatterDestroy(&b->Mvctx));
2732 
2733   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2734   PetscCall(MatDestroy(&b->B));
2735   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2736   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2737   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2738 
2739   PetscCall(MatDestroy(&b->A));
2740   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2741   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2742   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2743 
2744   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2745   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2746   B->preallocated  = PETSC_TRUE;
2747   B->was_assembled = PETSC_FALSE;
2748   B->assembled     = PETSC_FALSE;
2749   PetscFunctionReturn(PETSC_SUCCESS);
2750 }
2751 
2752 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2753 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2754 
2755 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2756 {
2757   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2758   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2759   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2760   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2761 
2762   PetscFunctionBegin;
2763   PetscCall(PetscMalloc1(M + 1, &ii));
2764   ii[0] = 0;
2765   for (i = 0; i < M; i++) {
2766     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2767     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2768     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2769     /* remove one from count of matrix has diagonal */
2770     for (j = id[i]; j < id[i + 1]; j++) {
2771       if (jd[j] == i) {
2772         ii[i + 1]--;
2773         break;
2774       }
2775     }
2776   }
2777   PetscCall(PetscMalloc1(ii[M], &jj));
2778   cnt = 0;
2779   for (i = 0; i < M; i++) {
2780     for (j = io[i]; j < io[i + 1]; j++) {
2781       if (garray[jo[j]] > rstart) break;
2782       jj[cnt++] = garray[jo[j]];
2783     }
2784     for (k = id[i]; k < id[i + 1]; k++) {
2785       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2786     }
2787     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2788   }
2789   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2790   PetscFunctionReturn(PETSC_SUCCESS);
2791 }
2792 
2793 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2794 
2795 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2796 
2797 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2798 {
2799   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2800   Mat_MPIAIJ  *b;
2801   Mat          B;
2802 
2803   PetscFunctionBegin;
2804   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2805 
2806   if (reuse == MAT_REUSE_MATRIX) {
2807     B = *newmat;
2808   } else {
2809     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2810     PetscCall(MatSetType(B, MATMPIAIJ));
2811     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2812     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2813     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2814     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2815   }
2816   b = (Mat_MPIAIJ *)B->data;
2817 
2818   if (reuse == MAT_REUSE_MATRIX) {
2819     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2820     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2821   } else {
2822     PetscInt   *garray = a->garray;
2823     Mat_SeqAIJ *bB;
2824     PetscInt    bs, nnz;
2825     PetscCall(MatDestroy(&b->A));
2826     PetscCall(MatDestroy(&b->B));
2827     /* just clear out the data structure */
2828     PetscCall(MatDisAssemble_MPIAIJ(B));
2829     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2830     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2831 
2832     /* Global numbering for b->B columns */
2833     bB  = (Mat_SeqAIJ *)b->B->data;
2834     bs  = A->rmap->bs;
2835     nnz = bB->i[A->rmap->n];
2836     for (PetscInt k = 0; k < nnz; k++) {
2837       PetscInt bj = bB->j[k] / bs;
2838       PetscInt br = bB->j[k] % bs;
2839       bB->j[k]    = garray[bj] * bs + br;
2840     }
2841   }
2842   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2843   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2844 
2845   if (reuse == MAT_INPLACE_MATRIX) {
2846     PetscCall(MatHeaderReplace(A, &B));
2847   } else {
2848     *newmat = B;
2849   }
2850   PetscFunctionReturn(PETSC_SUCCESS);
2851 }
2852 
2853 /*MC
2854    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2855 
2856    Options Database Keys:
2857 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2858 . -mat_block_size <bs> - set the blocksize used to store the matrix
2859 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2860 - -mat_use_hash_table <fact> - set hash table factor
2861 
2862    Level: beginner
2863 
2864    Note:
2865     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2866     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2867 
2868 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2869 M*/
2870 
2871 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2872 
2873 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2874 {
2875   Mat_MPIBAIJ *b;
2876   PetscBool    flg = PETSC_FALSE;
2877 
2878   PetscFunctionBegin;
2879   PetscCall(PetscNew(&b));
2880   B->data      = (void *)b;
2881   B->ops[0]    = MatOps_Values;
2882   B->assembled = PETSC_FALSE;
2883 
2884   B->insertmode = NOT_SET_VALUES;
2885   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2886   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2887 
2888   /* build local table of row and column ownerships */
2889   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2890 
2891   /* build cache for off array entries formed */
2892   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2893 
2894   b->donotstash  = PETSC_FALSE;
2895   b->colmap      = NULL;
2896   b->garray      = NULL;
2897   b->roworiented = PETSC_TRUE;
2898 
2899   /* stuff used in block assembly */
2900   b->barray = NULL;
2901 
2902   /* stuff used for matrix vector multiply */
2903   b->lvec  = NULL;
2904   b->Mvctx = NULL;
2905 
2906   /* stuff for MatGetRow() */
2907   b->rowindices   = NULL;
2908   b->rowvalues    = NULL;
2909   b->getrowactive = PETSC_FALSE;
2910 
2911   /* hash table stuff */
2912   b->ht           = NULL;
2913   b->hd           = NULL;
2914   b->ht_size      = 0;
2915   b->ht_flag      = PETSC_FALSE;
2916   b->ht_fact      = 0;
2917   b->ht_total_ct  = 0;
2918   b->ht_insert_ct = 0;
2919 
2920   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2921   b->ijonly = PETSC_FALSE;
2922 
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2926 #if defined(PETSC_HAVE_HYPRE)
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2928 #endif
2929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2936   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2937 
2938   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2939   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2940   if (flg) {
2941     PetscReal fact = 1.39;
2942     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2943     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2944     if (fact <= 1.0) fact = 1.39;
2945     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2946     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2947   }
2948   PetscOptionsEnd();
2949   PetscFunctionReturn(PETSC_SUCCESS);
2950 }
2951 
2952 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2953 /*MC
2954    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2955 
2956    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2957    and `MATMPIBAIJ` otherwise.
2958 
2959    Options Database Keys:
2960 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2961 
2962   Level: beginner
2963 
2964 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2965 M*/
2966 
2967 /*@C
2968   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2969   (block compressed row).
2970 
2971   Collective
2972 
2973   Input Parameters:
2974 + B     - the matrix
2975 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2976           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2977 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2978            submatrix  (same for all local rows)
2979 . d_nnz - array containing the number of block nonzeros in the various block rows
2980            of the in diagonal portion of the local (possibly different for each block
2981            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2982            set it even if it is zero.
2983 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2984            submatrix (same for all local rows).
2985 - o_nnz - array containing the number of nonzeros in the various block rows of the
2986            off-diagonal portion of the local submatrix (possibly different for
2987            each block row) or `NULL`.
2988 
2989    If the *_nnz parameter is given then the *_nz parameter is ignored
2990 
2991   Options Database Keys:
2992 + -mat_block_size            - size of the blocks to use
2993 - -mat_use_hash_table <fact> - set hash table factor
2994 
2995   Level: intermediate
2996 
2997   Notes:
2998   For good matrix assembly performance
2999   the user should preallocate the matrix storage by setting the parameters
3000   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3001   performance can be increased by more than a factor of 50.
3002 
3003   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3004   than it must be used on all processors that share the object for that argument.
3005 
3006   Storage Information:
3007   For a square global matrix we define each processor's diagonal portion
3008   to be its local rows and the corresponding columns (a square submatrix);
3009   each processor's off-diagonal portion encompasses the remainder of the
3010   local matrix (a rectangular submatrix).
3011 
3012   The user can specify preallocated storage for the diagonal part of
3013   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3014   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3015   memory allocation.  Likewise, specify preallocated storage for the
3016   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3017 
3018   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3019   the figure below we depict these three local rows and all columns (0-11).
3020 
3021 .vb
3022            0 1 2 3 4 5 6 7 8 9 10 11
3023           --------------------------
3024    row 3  |o o o d d d o o o o  o  o
3025    row 4  |o o o d d d o o o o  o  o
3026    row 5  |o o o d d d o o o o  o  o
3027           --------------------------
3028 .ve
3029 
3030   Thus, any entries in the d locations are stored in the d (diagonal)
3031   submatrix, and any entries in the o locations are stored in the
3032   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3033   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3034 
3035   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3036   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3037   In general, for PDE problems in which most nonzeros are near the diagonal,
3038   one expects `d_nz` >> `o_nz`.
3039 
3040   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3041   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3042   You can also run with the option `-info` and look for messages with the string
3043   malloc in them to see if additional memory allocation was needed.
3044 
3045 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3046 @*/
3047 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3048 {
3049   PetscFunctionBegin;
3050   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3051   PetscValidType(B, 1);
3052   PetscValidLogicalCollectiveInt(B, bs, 2);
3053   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3054   PetscFunctionReturn(PETSC_SUCCESS);
3055 }
3056 
3057 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3058 /*@C
3059   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3060   (block compressed row).
3061 
3062   Collective
3063 
3064   Input Parameters:
3065 + comm  - MPI communicator
3066 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3067           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3068 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3069            This value should be the same as the local size used in creating the
3070            y vector for the matrix-vector product y = Ax.
3071 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3072            This value should be the same as the local size used in creating the
3073            x vector for the matrix-vector product y = Ax.
3074 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3075 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3076 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3077            submatrix  (same for all local rows)
3078 . d_nnz - array containing the number of nonzero blocks in the various block rows
3079            of the in diagonal portion of the local (possibly different for each block
3080            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3081            and set it even if it is zero.
3082 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3083            submatrix (same for all local rows).
3084 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3085            off-diagonal portion of the local submatrix (possibly different for
3086            each block row) or NULL.
3087 
3088   Output Parameter:
3089 . A - the matrix
3090 
3091   Options Database Keys:
3092 + -mat_block_size            - size of the blocks to use
3093 - -mat_use_hash_table <fact> - set hash table factor
3094 
3095   Level: intermediate
3096 
3097   Notes:
3098   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3099   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3100   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3101 
3102   For good matrix assembly performance
3103   the user should preallocate the matrix storage by setting the parameters
3104   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3105   performance can be increased by more than a factor of 50.
3106 
3107   If the *_nnz parameter is given then the *_nz parameter is ignored
3108 
3109   A nonzero block is any block that as 1 or more nonzeros in it
3110 
3111   The user MUST specify either the local or global matrix dimensions
3112   (possibly both).
3113 
3114   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3115   than it must be used on all processors that share the object for that argument.
3116 
3117   Storage Information:
3118   For a square global matrix we define each processor's diagonal portion
3119   to be its local rows and the corresponding columns (a square submatrix);
3120   each processor's off-diagonal portion encompasses the remainder of the
3121   local matrix (a rectangular submatrix).
3122 
3123   The user can specify preallocated storage for the diagonal part of
3124   the local submatrix with either d_nz or d_nnz (not both).  Set
3125   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3126   memory allocation.  Likewise, specify preallocated storage for the
3127   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3128 
3129   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3130   the figure below we depict these three local rows and all columns (0-11).
3131 
3132 .vb
3133            0 1 2 3 4 5 6 7 8 9 10 11
3134           --------------------------
3135    row 3  |o o o d d d o o o o  o  o
3136    row 4  |o o o d d d o o o o  o  o
3137    row 5  |o o o d d d o o o o  o  o
3138           --------------------------
3139 .ve
3140 
3141   Thus, any entries in the d locations are stored in the d (diagonal)
3142   submatrix, and any entries in the o locations are stored in the
3143   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3144   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3145 
3146   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3147   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3148   In general, for PDE problems in which most nonzeros are near the diagonal,
3149   one expects `d_nz` >> `o_nz`.
3150 
3151 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3152 @*/
3153 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3154 {
3155   PetscMPIInt size;
3156 
3157   PetscFunctionBegin;
3158   PetscCall(MatCreate(comm, A));
3159   PetscCall(MatSetSizes(*A, m, n, M, N));
3160   PetscCallMPI(MPI_Comm_size(comm, &size));
3161   if (size > 1) {
3162     PetscCall(MatSetType(*A, MATMPIBAIJ));
3163     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3164   } else {
3165     PetscCall(MatSetType(*A, MATSEQBAIJ));
3166     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3167   }
3168   PetscFunctionReturn(PETSC_SUCCESS);
3169 }
3170 
3171 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3172 {
3173   Mat          mat;
3174   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3175   PetscInt     len = 0;
3176 
3177   PetscFunctionBegin;
3178   *newmat = NULL;
3179   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3180   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3181   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3182 
3183   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3184   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3185   if (matin->hash_active) {
3186     PetscCall(MatSetUp(mat));
3187   } else {
3188     mat->factortype   = matin->factortype;
3189     mat->preallocated = PETSC_TRUE;
3190     mat->assembled    = PETSC_TRUE;
3191     mat->insertmode   = NOT_SET_VALUES;
3192 
3193     a             = (Mat_MPIBAIJ *)mat->data;
3194     mat->rmap->bs = matin->rmap->bs;
3195     a->bs2        = oldmat->bs2;
3196     a->mbs        = oldmat->mbs;
3197     a->nbs        = oldmat->nbs;
3198     a->Mbs        = oldmat->Mbs;
3199     a->Nbs        = oldmat->Nbs;
3200 
3201     a->size         = oldmat->size;
3202     a->rank         = oldmat->rank;
3203     a->donotstash   = oldmat->donotstash;
3204     a->roworiented  = oldmat->roworiented;
3205     a->rowindices   = NULL;
3206     a->rowvalues    = NULL;
3207     a->getrowactive = PETSC_FALSE;
3208     a->barray       = NULL;
3209     a->rstartbs     = oldmat->rstartbs;
3210     a->rendbs       = oldmat->rendbs;
3211     a->cstartbs     = oldmat->cstartbs;
3212     a->cendbs       = oldmat->cendbs;
3213 
3214     /* hash table stuff */
3215     a->ht           = NULL;
3216     a->hd           = NULL;
3217     a->ht_size      = 0;
3218     a->ht_flag      = oldmat->ht_flag;
3219     a->ht_fact      = oldmat->ht_fact;
3220     a->ht_total_ct  = 0;
3221     a->ht_insert_ct = 0;
3222 
3223     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3224     if (oldmat->colmap) {
3225 #if defined(PETSC_USE_CTABLE)
3226       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3227 #else
3228       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3229       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3230 #endif
3231     } else a->colmap = NULL;
3232 
3233     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3234       PetscCall(PetscMalloc1(len, &a->garray));
3235       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3236     } else a->garray = NULL;
3237 
3238     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3239     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3240     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3241 
3242     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3243     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3244   }
3245   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3246   *newmat = mat;
3247   PetscFunctionReturn(PETSC_SUCCESS);
3248 }
3249 
3250 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3251 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3252 {
3253   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3254   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3255   PetscScalar *matvals;
3256 
3257   PetscFunctionBegin;
3258   PetscCall(PetscViewerSetUp(viewer));
3259 
3260   /* read in matrix header */
3261   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3262   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3263   M  = header[1];
3264   N  = header[2];
3265   nz = header[3];
3266   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3267   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3268   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3269 
3270   /* set block sizes from the viewer's .info file */
3271   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3272   /* set local sizes if not set already */
3273   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3274   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3275   /* set global sizes if not set already */
3276   if (mat->rmap->N < 0) mat->rmap->N = M;
3277   if (mat->cmap->N < 0) mat->cmap->N = N;
3278   PetscCall(PetscLayoutSetUp(mat->rmap));
3279   PetscCall(PetscLayoutSetUp(mat->cmap));
3280 
3281   /* check if the matrix sizes are correct */
3282   PetscCall(MatGetSize(mat, &rows, &cols));
3283   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3284   PetscCall(MatGetBlockSize(mat, &bs));
3285   PetscCall(MatGetLocalSize(mat, &m, &n));
3286   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3287   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3288   mbs = m / bs;
3289   nbs = n / bs;
3290 
3291   /* read in row lengths and build row indices */
3292   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3293   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3294   rowidxs[0] = 0;
3295   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3296   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3297   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3298 
3299   /* read in column indices and matrix values */
3300   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3301   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3302   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3303 
3304   {                /* preallocate matrix storage */
3305     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3306     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3307     PetscBool  sbaij, done;
3308     PetscInt  *d_nnz, *o_nnz;
3309 
3310     PetscCall(PetscBTCreate(nbs, &bt));
3311     PetscCall(PetscHSetICreate(&ht));
3312     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3313     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3314     for (i = 0; i < mbs; i++) {
3315       PetscCall(PetscBTMemzero(nbs, bt));
3316       PetscCall(PetscHSetIClear(ht));
3317       for (k = 0; k < bs; k++) {
3318         PetscInt row = bs * i + k;
3319         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3320           PetscInt col = colidxs[j];
3321           if (!sbaij || col >= row) {
3322             if (col >= cs && col < ce) {
3323               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3324             } else {
3325               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3326               if (done) o_nnz[i]++;
3327             }
3328           }
3329         }
3330       }
3331     }
3332     PetscCall(PetscBTDestroy(&bt));
3333     PetscCall(PetscHSetIDestroy(&ht));
3334     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3335     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3336     PetscCall(PetscFree2(d_nnz, o_nnz));
3337   }
3338 
3339   /* store matrix values */
3340   for (i = 0; i < m; i++) {
3341     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3342     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3343   }
3344 
3345   PetscCall(PetscFree(rowidxs));
3346   PetscCall(PetscFree2(colidxs, matvals));
3347   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3348   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3349   PetscFunctionReturn(PETSC_SUCCESS);
3350 }
3351 
3352 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3353 {
3354   PetscBool isbinary;
3355 
3356   PetscFunctionBegin;
3357   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3358   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3359   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3360   PetscFunctionReturn(PETSC_SUCCESS);
3361 }
3362 
3363 /*@
3364   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3365 
3366   Input Parameters:
3367 + mat  - the matrix
3368 - fact - factor
3369 
3370   Options Database Key:
3371 . -mat_use_hash_table <fact> - provide the factor
3372 
3373   Level: advanced
3374 
3375 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3376 @*/
3377 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3378 {
3379   PetscFunctionBegin;
3380   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3381   PetscFunctionReturn(PETSC_SUCCESS);
3382 }
3383 
3384 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3385 {
3386   Mat_MPIBAIJ *baij;
3387 
3388   PetscFunctionBegin;
3389   baij          = (Mat_MPIBAIJ *)mat->data;
3390   baij->ht_fact = fact;
3391   PetscFunctionReturn(PETSC_SUCCESS);
3392 }
3393 
3394 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3395 {
3396   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3397   PetscBool    flg;
3398 
3399   PetscFunctionBegin;
3400   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3401   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3402   if (Ad) *Ad = a->A;
3403   if (Ao) *Ao = a->B;
3404   if (colmap) *colmap = a->garray;
3405   PetscFunctionReturn(PETSC_SUCCESS);
3406 }
3407 
3408 /*
3409     Special version for direct calls from Fortran (to eliminate two function call overheads
3410 */
3411 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3412   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3413 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3414   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3415 #endif
3416 
3417 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3418 /*@C
3419   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3420 
3421   Collective
3422 
3423   Input Parameters:
3424 + matin  - the matrix
3425 . min    - number of input rows
3426 . im     - input rows
3427 . nin    - number of input columns
3428 . in     - input columns
3429 . v      - numerical values input
3430 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3431 
3432   Level: advanced
3433 
3434   Developer Notes:
3435   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3436 
3437 .seealso: `Mat`, `MatSetValuesBlocked()`
3438 @*/
3439 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3440 {
3441   /* convert input arguments to C version */
3442   Mat        mat = *matin;
3443   PetscInt   m = *min, n = *nin;
3444   InsertMode addv = *addvin;
3445 
3446   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3447   const MatScalar *value;
3448   MatScalar       *barray      = baij->barray;
3449   PetscBool        roworiented = baij->roworiented;
3450   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3451   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3452   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3453 
3454   PetscFunctionBegin;
3455   /* tasks normally handled by MatSetValuesBlocked() */
3456   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3457   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3458   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3459   if (mat->assembled) {
3460     mat->was_assembled = PETSC_TRUE;
3461     mat->assembled     = PETSC_FALSE;
3462   }
3463   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3464 
3465   if (!barray) {
3466     PetscCall(PetscMalloc1(bs2, &barray));
3467     baij->barray = barray;
3468   }
3469 
3470   if (roworiented) stepval = (n - 1) * bs;
3471   else stepval = (m - 1) * bs;
3472 
3473   for (i = 0; i < m; i++) {
3474     if (im[i] < 0) continue;
3475     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3476     if (im[i] >= rstart && im[i] < rend) {
3477       row = im[i] - rstart;
3478       for (j = 0; j < n; j++) {
3479         /* If NumCol = 1 then a copy is not required */
3480         if ((roworiented) && (n == 1)) {
3481           barray = (MatScalar *)v + i * bs2;
3482         } else if ((!roworiented) && (m == 1)) {
3483           barray = (MatScalar *)v + j * bs2;
3484         } else { /* Here a copy is required */
3485           if (roworiented) {
3486             value = v + i * (stepval + bs) * bs + j * bs;
3487           } else {
3488             value = v + j * (stepval + bs) * bs + i * bs;
3489           }
3490           for (ii = 0; ii < bs; ii++, value += stepval) {
3491             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3492           }
3493           barray -= bs2;
3494         }
3495 
3496         if (in[j] >= cstart && in[j] < cend) {
3497           col = in[j] - cstart;
3498           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3499         } else if (in[j] < 0) {
3500           continue;
3501         } else {
3502           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3503           if (mat->was_assembled) {
3504             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3505 
3506 #if defined(PETSC_USE_DEBUG)
3507   #if defined(PETSC_USE_CTABLE)
3508             {
3509               PetscInt data;
3510               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3511               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3512             }
3513   #else
3514             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3515   #endif
3516 #endif
3517 #if defined(PETSC_USE_CTABLE)
3518             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3519             col = (col - 1) / bs;
3520 #else
3521             col = (baij->colmap[in[j]] - 1) / bs;
3522 #endif
3523             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3524               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3525               col = in[j];
3526             }
3527           } else col = in[j];
3528           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3529         }
3530       }
3531     } else {
3532       if (!baij->donotstash) {
3533         if (roworiented) {
3534           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3535         } else {
3536           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3537         }
3538       }
3539     }
3540   }
3541 
3542   /* task normally handled by MatSetValuesBlocked() */
3543   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3544   PetscFunctionReturn(PETSC_SUCCESS);
3545 }
3546 
3547 /*@
3548   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3549   CSR format the local rows.
3550 
3551   Collective
3552 
3553   Input Parameters:
3554 + comm - MPI communicator
3555 . bs   - the block size, only a block size of 1 is supported
3556 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3557 . n    - This value should be the same as the local size used in creating the
3558        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3559        calculated if N is given) For square matrices n is almost always m.
3560 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3561 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3562 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3563 . j    - column indices
3564 - a    - matrix values
3565 
3566   Output Parameter:
3567 . mat - the matrix
3568 
3569   Level: intermediate
3570 
3571   Notes:
3572   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3573   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3574   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3575 
3576   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3577   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3578   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3579   with column-major ordering within blocks.
3580 
3581   The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array.
3582 
3583 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3584           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3585 @*/
3586 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3587 {
3588   PetscFunctionBegin;
3589   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3590   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3591   PetscCall(MatCreate(comm, mat));
3592   PetscCall(MatSetSizes(*mat, m, n, M, N));
3593   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3594   PetscCall(MatSetBlockSize(*mat, bs));
3595   PetscCall(MatSetUp(*mat));
3596   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3597   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3598   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3599   PetscFunctionReturn(PETSC_SUCCESS);
3600 }
3601 
3602 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3603 {
3604   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3605   PetscInt    *indx;
3606   PetscScalar *values;
3607 
3608   PetscFunctionBegin;
3609   PetscCall(MatGetSize(inmat, &m, &N));
3610   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3611     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3612     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3613     PetscInt    *bindx, rmax = a->rmax, j;
3614     PetscMPIInt  rank, size;
3615 
3616     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3617     mbs = m / bs;
3618     Nbs = N / cbs;
3619     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3620     nbs = n / cbs;
3621 
3622     PetscCall(PetscMalloc1(rmax, &bindx));
3623     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3624 
3625     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3626     PetscCallMPI(MPI_Comm_rank(comm, &size));
3627     if (rank == size - 1) {
3628       /* Check sum(nbs) = Nbs */
3629       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3630     }
3631 
3632     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3633     for (i = 0; i < mbs; i++) {
3634       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3635       nnz = nnz / bs;
3636       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3637       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3638       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3639     }
3640     PetscCall(PetscFree(bindx));
3641 
3642     PetscCall(MatCreate(comm, outmat));
3643     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3644     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3645     PetscCall(MatSetType(*outmat, MATBAIJ));
3646     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3647     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3648     MatPreallocateEnd(dnz, onz);
3649     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3650   }
3651 
3652   /* numeric phase */
3653   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3654   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3655 
3656   for (i = 0; i < m; i++) {
3657     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3658     Ii = i + rstart;
3659     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3660     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3661   }
3662   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3663   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3664   PetscFunctionReturn(PETSC_SUCCESS);
3665 }
3666