xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision b8d709ab44c354dba7591c0191c0984ef3bb0cd5)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
14   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
15   PetscScalar       *va, *vv;
16   Vec                vB, vA;
17   const PetscScalar *vb;
18 
19   PetscFunctionBegin;
20   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
21   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
22 
23   PetscCall(VecGetArrayWrite(vA, &va));
24   if (idx) {
25     for (i = 0; i < m; i++) {
26       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
27     }
28   }
29 
30   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
31   PetscCall(PetscMalloc1(m, &idxb));
32   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
33 
34   PetscCall(VecGetArrayWrite(v, &vv));
35   PetscCall(VecGetArrayRead(vB, &vb));
36   for (i = 0; i < m; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
38       vv[i] = vb[i];
39       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
40     } else {
41       vv[i] = va[i];
42       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
43     }
44   }
45   PetscCall(VecRestoreArrayWrite(vA, &vv));
46   PetscCall(VecRestoreArrayWrite(vA, &va));
47   PetscCall(VecRestoreArrayRead(vB, &vb));
48   PetscCall(PetscFree(idxb));
49   PetscCall(VecDestroy(&vA));
50   PetscCall(VecDestroy(&vB));
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
55 {
56   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
57 
58   PetscFunctionBegin;
59   PetscCall(MatStoreValues(aij->A));
60   PetscCall(MatStoreValues(aij->B));
61   PetscFunctionReturn(0);
62 }
63 
64 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
65 {
66   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
67 
68   PetscFunctionBegin;
69   PetscCall(MatRetrieveValues(aij->A));
70   PetscCall(MatRetrieveValues(aij->B));
71   PetscFunctionReturn(0);
72 }
73 
74 /*
75      Local utility routine that creates a mapping from the global column
76    number to the local number in the off-diagonal part of the local
77    storage of the matrix.  This is done in a non scalable way since the
78    length of colmap equals the global matrix length.
79 */
80 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
81 {
82   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
83   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
84   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
85 
86   PetscFunctionBegin;
87 #if defined(PETSC_USE_CTABLE)
88   PetscCall(PetscTableCreate(baij->nbs, baij->Nbs + 1, &baij->colmap));
89   for (i = 0; i < nbs; i++) PetscCall(PetscTableAdd(baij->colmap, baij->garray[i] + 1, i * bs + 1, INSERT_VALUES));
90 #else
91   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
92   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
93 #endif
94   PetscFunctionReturn(0);
95 }
96 
97 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
98   { \
99     brow = row / bs; \
100     rp   = aj + ai[brow]; \
101     ap   = aa + bs2 * ai[brow]; \
102     rmax = aimax[brow]; \
103     nrow = ailen[brow]; \
104     bcol = col / bs; \
105     ridx = row % bs; \
106     cidx = col % bs; \
107     low  = 0; \
108     high = nrow; \
109     while (high - low > 3) { \
110       t = (low + high) / 2; \
111       if (rp[t] > bcol) high = t; \
112       else low = t; \
113     } \
114     for (_i = low; _i < high; _i++) { \
115       if (rp[_i] > bcol) break; \
116       if (rp[_i] == bcol) { \
117         bap = ap + bs2 * _i + bs * cidx + ridx; \
118         if (addv == ADD_VALUES) *bap += value; \
119         else *bap = value; \
120         goto a_noinsert; \
121       } \
122     } \
123     if (a->nonew == 1) goto a_noinsert; \
124     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
125     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
126     N = nrow++ - 1; \
127     /* shift up all the later entries in this row */ \
128     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
129     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
130     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
131     rp[_i]                          = bcol; \
132     ap[bs2 * _i + bs * cidx + ridx] = value; \
133   a_noinsert:; \
134     ailen[brow] = nrow; \
135   }
136 
137 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
138   { \
139     brow = row / bs; \
140     rp   = bj + bi[brow]; \
141     ap   = ba + bs2 * bi[brow]; \
142     rmax = bimax[brow]; \
143     nrow = bilen[brow]; \
144     bcol = col / bs; \
145     ridx = row % bs; \
146     cidx = col % bs; \
147     low  = 0; \
148     high = nrow; \
149     while (high - low > 3) { \
150       t = (low + high) / 2; \
151       if (rp[t] > bcol) high = t; \
152       else low = t; \
153     } \
154     for (_i = low; _i < high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap + bs2 * _i + bs * cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value; \
159         else *bap = value; \
160         goto b_noinsert; \
161       } \
162     } \
163     if (b->nonew == 1) goto b_noinsert; \
164     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
166     N = nrow++ - 1; \
167     /* shift up all the later entries in this row */ \
168     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
169     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
170     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
171     rp[_i]                          = bcol; \
172     ap[bs2 * _i + bs * cidx + ridx] = value; \
173   b_noinsert:; \
174     bilen[brow] = nrow; \
175   }
176 
177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
178 {
179   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
180   MatScalar    value;
181   PetscBool    roworiented = baij->roworiented;
182   PetscInt     i, j, row, col;
183   PetscInt     rstart_orig = mat->rmap->rstart;
184   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
185   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
186 
187   /* Some Variables required in the macro */
188   Mat          A     = baij->A;
189   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
190   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
191   MatScalar   *aa = a->a;
192 
193   Mat          B     = baij->B;
194   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
195   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
196   MatScalar   *ba = b->a;
197 
198   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
199   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
200   MatScalar *ap, *bap;
201 
202   PetscFunctionBegin;
203   for (i = 0; i < m; i++) {
204     if (im[i] < 0) continue;
205     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
206     if (im[i] >= rstart_orig && im[i] < rend_orig) {
207       row = im[i] - rstart_orig;
208       for (j = 0; j < n; j++) {
209         if (in[j] >= cstart_orig && in[j] < cend_orig) {
210           col = in[j] - cstart_orig;
211           if (roworiented) value = v[i * n + j];
212           else value = v[i + j * m];
213           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
214         } else if (in[j] < 0) {
215           continue;
216         } else {
217           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
218           if (mat->was_assembled) {
219             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
220 #if defined(PETSC_USE_CTABLE)
221             PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col));
222             col = col - 1;
223 #else
224             col = baij->colmap[in[j] / bs] - 1;
225 #endif
226             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
227               PetscCall(MatDisAssemble_MPIBAIJ(mat));
228               col = in[j];
229               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
230               B     = baij->B;
231               b     = (Mat_SeqBAIJ *)(B)->data;
232               bimax = b->imax;
233               bi    = b->i;
234               bilen = b->ilen;
235               bj    = b->j;
236               ba    = b->a;
237             } else {
238               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
239               col += in[j] % bs;
240             }
241           } else col = in[j];
242           if (roworiented) value = v[i * n + j];
243           else value = v[i + j * m];
244           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
245           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
246         }
247       }
248     } else {
249       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
250       if (!baij->donotstash) {
251         mat->assembled = PETSC_FALSE;
252         if (roworiented) {
253           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
254         } else {
255           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
256         }
257       }
258     }
259   }
260   PetscFunctionReturn(0);
261 }
262 
263 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
264 {
265   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
266   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
267   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
268   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
269   PetscBool          roworiented = a->roworiented;
270   const PetscScalar *value       = v;
271   MatScalar         *ap, *aa = a->a, *bap;
272 
273   PetscFunctionBegin;
274   rp    = aj + ai[row];
275   ap    = aa + bs2 * ai[row];
276   rmax  = imax[row];
277   nrow  = ailen[row];
278   value = v;
279   low   = 0;
280   high  = nrow;
281   while (high - low > 7) {
282     t = (low + high) / 2;
283     if (rp[t] > col) high = t;
284     else low = t;
285   }
286   for (i = low; i < high; i++) {
287     if (rp[i] > col) break;
288     if (rp[i] == col) {
289       bap = ap + bs2 * i;
290       if (roworiented) {
291         if (is == ADD_VALUES) {
292           for (ii = 0; ii < bs; ii++) {
293             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
294           }
295         } else {
296           for (ii = 0; ii < bs; ii++) {
297             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
298           }
299         }
300       } else {
301         if (is == ADD_VALUES) {
302           for (ii = 0; ii < bs; ii++, value += bs) {
303             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
304             bap += bs;
305           }
306         } else {
307           for (ii = 0; ii < bs; ii++, value += bs) {
308             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
309             bap += bs;
310           }
311         }
312       }
313       goto noinsert2;
314     }
315   }
316   if (nonew == 1) goto noinsert2;
317   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
318   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
319   N = nrow++ - 1;
320   high++;
321   /* shift up all the later entries in this row */
322   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
323   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
324   rp[i] = col;
325   bap   = ap + bs2 * i;
326   if (roworiented) {
327     for (ii = 0; ii < bs; ii++) {
328       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
329     }
330   } else {
331     for (ii = 0; ii < bs; ii++) {
332       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
333     }
334   }
335 noinsert2:;
336   ailen[row] = nrow;
337   PetscFunctionReturn(0);
338 }
339 
340 /*
341     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
342     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
343 */
344 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
345 {
346   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
347   const PetscScalar *value;
348   MatScalar         *barray      = baij->barray;
349   PetscBool          roworiented = baij->roworiented;
350   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
351   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
352   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
353 
354   PetscFunctionBegin;
355   if (!barray) {
356     PetscCall(PetscMalloc1(bs2, &barray));
357     baij->barray = barray;
358   }
359 
360   if (roworiented) stepval = (n - 1) * bs;
361   else stepval = (m - 1) * bs;
362 
363   for (i = 0; i < m; i++) {
364     if (im[i] < 0) continue;
365     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
366     if (im[i] >= rstart && im[i] < rend) {
367       row = im[i] - rstart;
368       for (j = 0; j < n; j++) {
369         /* If NumCol = 1 then a copy is not required */
370         if ((roworiented) && (n == 1)) {
371           barray = (MatScalar *)v + i * bs2;
372         } else if ((!roworiented) && (m == 1)) {
373           barray = (MatScalar *)v + j * bs2;
374         } else { /* Here a copy is required */
375           if (roworiented) {
376             value = v + (i * (stepval + bs) + j) * bs;
377           } else {
378             value = v + (j * (stepval + bs) + i) * bs;
379           }
380           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
381             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
382             barray += bs;
383           }
384           barray -= bs2;
385         }
386 
387         if (in[j] >= cstart && in[j] < cend) {
388           col = in[j] - cstart;
389           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
390         } else if (in[j] < 0) {
391           continue;
392         } else {
393           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
394           if (mat->was_assembled) {
395             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
396 
397 #if defined(PETSC_USE_DEBUG)
398   #if defined(PETSC_USE_CTABLE)
399             {
400               PetscInt data;
401               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
402               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
403             }
404   #else
405             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
406   #endif
407 #endif
408 #if defined(PETSC_USE_CTABLE)
409             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
410             col = (col - 1) / bs;
411 #else
412             col = (baij->colmap[in[j]] - 1) / bs;
413 #endif
414             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
415               PetscCall(MatDisAssemble_MPIBAIJ(mat));
416               col = in[j];
417             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
418           } else col = in[j];
419           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
420         }
421       }
422     } else {
423       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
424       if (!baij->donotstash) {
425         if (roworiented) {
426           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
427         } else {
428           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
429         }
430       }
431     }
432   }
433   PetscFunctionReturn(0);
434 }
435 
436 #define HASH_KEY             0.6180339887
437 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
438 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
439 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
440 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
441 {
442   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
443   PetscBool    roworiented = baij->roworiented;
444   PetscInt     i, j, row, col;
445   PetscInt     rstart_orig = mat->rmap->rstart;
446   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
447   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
448   PetscReal    tmp;
449   MatScalar  **HD       = baij->hd, value;
450   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
451 
452   PetscFunctionBegin;
453   for (i = 0; i < m; i++) {
454     if (PetscDefined(USE_DEBUG)) {
455       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
456       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
457     }
458     row = im[i];
459     if (row >= rstart_orig && row < rend_orig) {
460       for (j = 0; j < n; j++) {
461         col = in[j];
462         if (roworiented) value = v[i * n + j];
463         else value = v[i + j * m];
464         /* Look up PetscInto the Hash Table */
465         key = (row / bs) * Nbs + (col / bs) + 1;
466         h1  = HASH(size, key, tmp);
467 
468         idx = h1;
469         if (PetscDefined(USE_DEBUG)) {
470           insert_ct++;
471           total_ct++;
472           if (HT[idx] != key) {
473             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
474               ;
475             if (idx == size) {
476               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
477                 ;
478               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
479             }
480           }
481         } else if (HT[idx] != key) {
482           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
483             ;
484           if (idx == size) {
485             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
486               ;
487             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
488           }
489         }
490         /* A HASH table entry is found, so insert the values at the correct address */
491         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
492         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
493       }
494     } else if (!baij->donotstash) {
495       if (roworiented) {
496         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
497       } else {
498         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
499       }
500     }
501   }
502   if (PetscDefined(USE_DEBUG)) {
503     baij->ht_total_ct += total_ct;
504     baij->ht_insert_ct += insert_ct;
505   }
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
512   PetscBool          roworiented = baij->roworiented;
513   PetscInt           i, j, ii, jj, row, col;
514   PetscInt           rstart = baij->rstartbs;
515   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
516   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
517   PetscReal          tmp;
518   MatScalar        **HD = baij->hd, *baij_a;
519   const PetscScalar *v_t, *value;
520   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
521 
522   PetscFunctionBegin;
523   if (roworiented) stepval = (n - 1) * bs;
524   else stepval = (m - 1) * bs;
525 
526   for (i = 0; i < m; i++) {
527     if (PetscDefined(USE_DEBUG)) {
528       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
529       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
530     }
531     row = im[i];
532     v_t = v + i * nbs2;
533     if (row >= rstart && row < rend) {
534       for (j = 0; j < n; j++) {
535         col = in[j];
536 
537         /* Look up into the Hash Table */
538         key = row * Nbs + col + 1;
539         h1  = HASH(size, key, tmp);
540 
541         idx = h1;
542         if (PetscDefined(USE_DEBUG)) {
543           total_ct++;
544           insert_ct++;
545           if (HT[idx] != key) {
546             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
547               ;
548             if (idx == size) {
549               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
550                 ;
551               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
552             }
553           }
554         } else if (HT[idx] != key) {
555           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
556             ;
557           if (idx == size) {
558             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
559               ;
560             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
561           }
562         }
563         baij_a = HD[idx];
564         if (roworiented) {
565           /*value = v + i*(stepval+bs)*bs + j*bs;*/
566           /* value = v + (i*(stepval+bs)+j)*bs; */
567           value = v_t;
568           v_t += bs;
569           if (addv == ADD_VALUES) {
570             for (ii = 0; ii < bs; ii++, value += stepval) {
571               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
572             }
573           } else {
574             for (ii = 0; ii < bs; ii++, value += stepval) {
575               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
576             }
577           }
578         } else {
579           value = v + j * (stepval + bs) * bs + i * bs;
580           if (addv == ADD_VALUES) {
581             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
582               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
583             }
584           } else {
585             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
586               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
587             }
588           }
589         }
590       }
591     } else {
592       if (!baij->donotstash) {
593         if (roworiented) {
594           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
595         } else {
596           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
597         }
598       }
599     }
600   }
601   if (PetscDefined(USE_DEBUG)) {
602     baij->ht_total_ct += total_ct;
603     baij->ht_insert_ct += insert_ct;
604   }
605   PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
609 {
610   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
611   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
612   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
613 
614   PetscFunctionBegin;
615   for (i = 0; i < m; i++) {
616     if (idxm[i] < 0) continue; /* negative row */
617     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
618     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
619       row = idxm[i] - bsrstart;
620       for (j = 0; j < n; j++) {
621         if (idxn[j] < 0) continue; /* negative column */
622         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
623         if (idxn[j] >= bscstart && idxn[j] < bscend) {
624           col = idxn[j] - bscstart;
625           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
626         } else {
627           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
628 #if defined(PETSC_USE_CTABLE)
629           PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data));
630           data--;
631 #else
632           data = baij->colmap[idxn[j] / bs] - 1;
633 #endif
634           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
635           else {
636             col = data + idxn[j] % bs;
637             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
638           }
639         }
640       }
641     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
642   }
643   PetscFunctionReturn(0);
644 }
645 
646 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
647 {
648   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
649   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
650   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
651   PetscReal    sum = 0.0;
652   MatScalar   *v;
653 
654   PetscFunctionBegin;
655   if (baij->size == 1) {
656     PetscCall(MatNorm(baij->A, type, nrm));
657   } else {
658     if (type == NORM_FROBENIUS) {
659       v  = amat->a;
660       nz = amat->nz * bs2;
661       for (i = 0; i < nz; i++) {
662         sum += PetscRealPart(PetscConj(*v) * (*v));
663         v++;
664       }
665       v  = bmat->a;
666       nz = bmat->nz * bs2;
667       for (i = 0; i < nz; i++) {
668         sum += PetscRealPart(PetscConj(*v) * (*v));
669         v++;
670       }
671       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
672       *nrm = PetscSqrtReal(*nrm);
673     } else if (type == NORM_1) { /* max column sum */
674       PetscReal *tmp, *tmp2;
675       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
676       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
677       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
678       v  = amat->a;
679       jj = amat->j;
680       for (i = 0; i < amat->nz; i++) {
681         for (j = 0; j < bs; j++) {
682           col = bs * (cstart + *jj) + j; /* column index */
683           for (row = 0; row < bs; row++) {
684             tmp[col] += PetscAbsScalar(*v);
685             v++;
686           }
687         }
688         jj++;
689       }
690       v  = bmat->a;
691       jj = bmat->j;
692       for (i = 0; i < bmat->nz; i++) {
693         for (j = 0; j < bs; j++) {
694           col = bs * garray[*jj] + j;
695           for (row = 0; row < bs; row++) {
696             tmp[col] += PetscAbsScalar(*v);
697             v++;
698           }
699         }
700         jj++;
701       }
702       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
703       *nrm = 0.0;
704       for (j = 0; j < mat->cmap->N; j++) {
705         if (tmp2[j] > *nrm) *nrm = tmp2[j];
706       }
707       PetscCall(PetscFree(tmp));
708       PetscCall(PetscFree(tmp2));
709     } else if (type == NORM_INFINITY) { /* max row sum */
710       PetscReal *sums;
711       PetscCall(PetscMalloc1(bs, &sums));
712       sum = 0.0;
713       for (j = 0; j < amat->mbs; j++) {
714         for (row = 0; row < bs; row++) sums[row] = 0.0;
715         v  = amat->a + bs2 * amat->i[j];
716         nz = amat->i[j + 1] - amat->i[j];
717         for (i = 0; i < nz; i++) {
718           for (col = 0; col < bs; col++) {
719             for (row = 0; row < bs; row++) {
720               sums[row] += PetscAbsScalar(*v);
721               v++;
722             }
723           }
724         }
725         v  = bmat->a + bs2 * bmat->i[j];
726         nz = bmat->i[j + 1] - bmat->i[j];
727         for (i = 0; i < nz; i++) {
728           for (col = 0; col < bs; col++) {
729             for (row = 0; row < bs; row++) {
730               sums[row] += PetscAbsScalar(*v);
731               v++;
732             }
733           }
734         }
735         for (row = 0; row < bs; row++) {
736           if (sums[row] > sum) sum = sums[row];
737         }
738       }
739       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
740       PetscCall(PetscFree(sums));
741     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
742   }
743   PetscFunctionReturn(0);
744 }
745 
746 /*
747   Creates the hash table, and sets the table
748   This table is created only once.
749   If new entried need to be added to the matrix
750   then the hash table has to be destroyed and
751   recreated.
752 */
753 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
754 {
755   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
756   Mat          A = baij->A, B = baij->B;
757   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
758   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
759   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
760   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
761   PetscInt    *HT, key;
762   MatScalar  **HD;
763   PetscReal    tmp;
764 #if defined(PETSC_USE_INFO)
765   PetscInt ct = 0, max = 0;
766 #endif
767 
768   PetscFunctionBegin;
769   if (baij->ht) PetscFunctionReturn(0);
770 
771   baij->ht_size = (PetscInt)(factor * nz);
772   ht_size       = baij->ht_size;
773 
774   /* Allocate Memory for Hash Table */
775   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
776   HD = baij->hd;
777   HT = baij->ht;
778 
779   /* Loop Over A */
780   for (i = 0; i < a->mbs; i++) {
781     for (j = ai[i]; j < ai[i + 1]; j++) {
782       row = i + rstart;
783       col = aj[j] + cstart;
784 
785       key = row * Nbs + col + 1;
786       h1  = HASH(ht_size, key, tmp);
787       for (k = 0; k < ht_size; k++) {
788         if (!HT[(h1 + k) % ht_size]) {
789           HT[(h1 + k) % ht_size] = key;
790           HD[(h1 + k) % ht_size] = a->a + j * bs2;
791           break;
792 #if defined(PETSC_USE_INFO)
793         } else {
794           ct++;
795 #endif
796         }
797       }
798 #if defined(PETSC_USE_INFO)
799       if (k > max) max = k;
800 #endif
801     }
802   }
803   /* Loop Over B */
804   for (i = 0; i < b->mbs; i++) {
805     for (j = bi[i]; j < bi[i + 1]; j++) {
806       row = i + rstart;
807       col = garray[bj[j]];
808       key = row * Nbs + col + 1;
809       h1  = HASH(ht_size, key, tmp);
810       for (k = 0; k < ht_size; k++) {
811         if (!HT[(h1 + k) % ht_size]) {
812           HT[(h1 + k) % ht_size] = key;
813           HD[(h1 + k) % ht_size] = b->a + j * bs2;
814           break;
815 #if defined(PETSC_USE_INFO)
816         } else {
817           ct++;
818 #endif
819         }
820       }
821 #if defined(PETSC_USE_INFO)
822       if (k > max) max = k;
823 #endif
824     }
825   }
826 
827   /* Print Summary */
828 #if defined(PETSC_USE_INFO)
829   for (i = 0, j = 0; i < ht_size; i++) {
830     if (HT[i]) j++;
831   }
832   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
833 #endif
834   PetscFunctionReturn(0);
835 }
836 
837 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
838 {
839   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
840   PetscInt     nstash, reallocs;
841 
842   PetscFunctionBegin;
843   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
844 
845   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
846   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
847   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
848   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
849   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
850   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
855 {
856   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
857   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
858   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
859   PetscInt    *row, *col;
860   PetscBool    r1, r2, r3, other_disassembled;
861   MatScalar   *val;
862   PetscMPIInt  n;
863 
864   PetscFunctionBegin;
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   if (!baij->donotstash && !mat->nooffprocentries) {
867     while (1) {
868       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
869       if (!flg) break;
870 
871       for (i = 0; i < n;) {
872         /* Now identify the consecutive vals belonging to the same row */
873         for (j = i, rstart = row[j]; j < n; j++) {
874           if (row[j] != rstart) break;
875         }
876         if (j < n) ncols = j - i;
877         else ncols = n - i;
878         /* Now assemble all these values with a single function call */
879         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
880         i = j;
881       }
882     }
883     PetscCall(MatStashScatterEnd_Private(&mat->stash));
884     /* Now process the block-stash. Since the values are stashed column-oriented,
885        set the roworiented flag to column oriented, and after MatSetValues()
886        restore the original flags */
887     r1 = baij->roworiented;
888     r2 = a->roworiented;
889     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
890 
891     baij->roworiented = PETSC_FALSE;
892     a->roworiented    = PETSC_FALSE;
893 
894     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
895     while (1) {
896       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
897       if (!flg) break;
898 
899       for (i = 0; i < n;) {
900         /* Now identify the consecutive vals belonging to the same row */
901         for (j = i, rstart = row[j]; j < n; j++) {
902           if (row[j] != rstart) break;
903         }
904         if (j < n) ncols = j - i;
905         else ncols = n - i;
906         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
907         i = j;
908       }
909     }
910     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
911 
912     baij->roworiented = r1;
913     a->roworiented    = r2;
914 
915     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
916   }
917 
918   PetscCall(MatAssemblyBegin(baij->A, mode));
919   PetscCall(MatAssemblyEnd(baij->A, mode));
920 
921   /* determine if any processor has disassembled, if so we must
922      also disassemble ourselves, in order that we may reassemble. */
923   /*
924      if nonzero structure of submatrix B cannot change then we know that
925      no processor disassembled thus we can skip this stuff
926   */
927   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
928     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
929     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
930   }
931 
932   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
933   PetscCall(MatAssemblyBegin(baij->B, mode));
934   PetscCall(MatAssemblyEnd(baij->B, mode));
935 
936 #if defined(PETSC_USE_INFO)
937   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
938     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
939 
940     baij->ht_total_ct  = 0;
941     baij->ht_insert_ct = 0;
942   }
943 #endif
944   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
945     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
946 
947     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
948     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
949   }
950 
951   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
952 
953   baij->rowvalues = NULL;
954 
955   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
956   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
957     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
958     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
959   }
960   PetscFunctionReturn(0);
961 }
962 
963 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
964 #include <petscdraw.h>
965 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
966 {
967   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
968   PetscMPIInt       rank = baij->rank;
969   PetscInt          bs   = mat->rmap->bs;
970   PetscBool         iascii, isdraw;
971   PetscViewer       sviewer;
972   PetscViewerFormat format;
973 
974   PetscFunctionBegin;
975   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
976   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
977   if (iascii) {
978     PetscCall(PetscViewerGetFormat(viewer, &format));
979     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
980       MatInfo info;
981       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
982       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
983       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
984       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
985                                                    mat->rmap->bs, (double)info.memory));
986       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
987       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
988       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
989       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
990       PetscCall(PetscViewerFlush(viewer));
991       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
992       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
993       PetscCall(VecScatterView(baij->Mvctx, viewer));
994       PetscFunctionReturn(0);
995     } else if (format == PETSC_VIEWER_ASCII_INFO) {
996       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
997       PetscFunctionReturn(0);
998     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
999       PetscFunctionReturn(0);
1000     }
1001   }
1002 
1003   if (isdraw) {
1004     PetscDraw draw;
1005     PetscBool isnull;
1006     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1007     PetscCall(PetscDrawIsNull(draw, &isnull));
1008     if (isnull) PetscFunctionReturn(0);
1009   }
1010 
1011   {
1012     /* assemble the entire matrix onto first processor. */
1013     Mat          A;
1014     Mat_SeqBAIJ *Aloc;
1015     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1016     MatScalar   *a;
1017     const char  *matname;
1018 
1019     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1020     /* Perhaps this should be the type of mat? */
1021     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1022     if (rank == 0) {
1023       PetscCall(MatSetSizes(A, M, N, M, N));
1024     } else {
1025       PetscCall(MatSetSizes(A, 0, 0, M, N));
1026     }
1027     PetscCall(MatSetType(A, MATMPIBAIJ));
1028     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1029     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1030 
1031     /* copy over the A part */
1032     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1033     ai   = Aloc->i;
1034     aj   = Aloc->j;
1035     a    = Aloc->a;
1036     PetscCall(PetscMalloc1(bs, &rvals));
1037 
1038     for (i = 0; i < mbs; i++) {
1039       rvals[0] = bs * (baij->rstartbs + i);
1040       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1041       for (j = ai[i]; j < ai[i + 1]; j++) {
1042         col = (baij->cstartbs + aj[j]) * bs;
1043         for (k = 0; k < bs; k++) {
1044           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1045           col++;
1046           a += bs;
1047         }
1048       }
1049     }
1050     /* copy over the B part */
1051     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1052     ai   = Aloc->i;
1053     aj   = Aloc->j;
1054     a    = Aloc->a;
1055     for (i = 0; i < mbs; i++) {
1056       rvals[0] = bs * (baij->rstartbs + i);
1057       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1058       for (j = ai[i]; j < ai[i + 1]; j++) {
1059         col = baij->garray[aj[j]] * bs;
1060         for (k = 0; k < bs; k++) {
1061           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1062           col++;
1063           a += bs;
1064         }
1065       }
1066     }
1067     PetscCall(PetscFree(rvals));
1068     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1069     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1070     /*
1071        Everyone has to call to draw the matrix since the graphics waits are
1072        synchronized across all processors that share the PetscDraw object
1073     */
1074     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1075     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1076     if (rank == 0) {
1077       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1078       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1079     }
1080     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1081     PetscCall(PetscViewerFlush(viewer));
1082     PetscCall(MatDestroy(&A));
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1089 {
1090   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1091   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1092   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1093   const PetscInt *garray = aij->garray;
1094   PetscInt        header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l;
1095   PetscInt       *rowlens, *colidxs;
1096   PetscScalar    *matvals;
1097 
1098   PetscFunctionBegin;
1099   PetscCall(PetscViewerSetUp(viewer));
1100 
1101   M  = mat->rmap->N;
1102   N  = mat->cmap->N;
1103   m  = mat->rmap->n;
1104   rs = mat->rmap->rstart;
1105   cs = mat->cmap->rstart;
1106   bs = mat->rmap->bs;
1107   nz = bs * bs * (A->nz + B->nz);
1108 
1109   /* write matrix header */
1110   header[0] = MAT_FILE_CLASSID;
1111   header[1] = M;
1112   header[2] = N;
1113   header[3] = nz;
1114   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1115   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1116 
1117   /* fill in and store row lengths */
1118   PetscCall(PetscMalloc1(m, &rowlens));
1119   for (cnt = 0, i = 0; i < A->mbs; i++)
1120     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1121   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1122   PetscCall(PetscFree(rowlens));
1123 
1124   /* fill in and store column indices */
1125   PetscCall(PetscMalloc1(nz, &colidxs));
1126   for (cnt = 0, i = 0; i < A->mbs; i++) {
1127     for (k = 0; k < bs; k++) {
1128       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1129         if (garray[B->j[jb]] > cs / bs) break;
1130         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1131       }
1132       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1133         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1134       for (; jb < B->i[i + 1]; jb++)
1135         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1136     }
1137   }
1138   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1139   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1140   PetscCall(PetscFree(colidxs));
1141 
1142   /* fill in and store nonzero values */
1143   PetscCall(PetscMalloc1(nz, &matvals));
1144   for (cnt = 0, i = 0; i < A->mbs; i++) {
1145     for (k = 0; k < bs; k++) {
1146       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1147         if (garray[B->j[jb]] > cs / bs) break;
1148         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1149       }
1150       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1151         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1152       for (; jb < B->i[i + 1]; jb++)
1153         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1154     }
1155   }
1156   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1157   PetscCall(PetscFree(matvals));
1158 
1159   /* write block size option to the viewer's .info file */
1160   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1165 {
1166   PetscBool iascii, isdraw, issocket, isbinary;
1167 
1168   PetscFunctionBegin;
1169   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1170   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1171   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1172   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1173   if (iascii || isdraw || issocket) {
1174     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1175   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1180 {
1181   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1182 
1183   PetscFunctionBegin;
1184 #if defined(PETSC_USE_LOG)
1185   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1186 #endif
1187   PetscCall(MatStashDestroy_Private(&mat->stash));
1188   PetscCall(MatStashDestroy_Private(&mat->bstash));
1189   PetscCall(MatDestroy(&baij->A));
1190   PetscCall(MatDestroy(&baij->B));
1191 #if defined(PETSC_USE_CTABLE)
1192   PetscCall(PetscTableDestroy(&baij->colmap));
1193 #else
1194   PetscCall(PetscFree(baij->colmap));
1195 #endif
1196   PetscCall(PetscFree(baij->garray));
1197   PetscCall(VecDestroy(&baij->lvec));
1198   PetscCall(VecScatterDestroy(&baij->Mvctx));
1199   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1200   PetscCall(PetscFree(baij->barray));
1201   PetscCall(PetscFree2(baij->hd, baij->ht));
1202   PetscCall(PetscFree(baij->rangebs));
1203   PetscCall(PetscFree(mat->data));
1204 
1205   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1206   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1207   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1208   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
1209   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
1210   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1211   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
1212   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
1213   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
1214   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
1215 #if defined(PETSC_HAVE_HYPRE)
1216   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
1217 #endif
1218   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1223 {
1224   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1225   PetscInt     nt;
1226 
1227   PetscFunctionBegin;
1228   PetscCall(VecGetLocalSize(xx, &nt));
1229   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1230   PetscCall(VecGetLocalSize(yy, &nt));
1231   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1232   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1233   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1234   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1235   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1240 {
1241   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1242 
1243   PetscFunctionBegin;
1244   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1245   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1246   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1247   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1252 {
1253   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1254 
1255   PetscFunctionBegin;
1256   /* do nondiagonal part */
1257   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1258   /* do local part */
1259   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1260   /* add partial results together */
1261   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1262   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1267 {
1268   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1269 
1270   PetscFunctionBegin;
1271   /* do nondiagonal part */
1272   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1273   /* do local part */
1274   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1275   /* add partial results together */
1276   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1277   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 /*
1282   This only works correctly for square matrices where the subblock A->A is the
1283    diagonal block
1284 */
1285 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1286 {
1287   PetscFunctionBegin;
1288   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1289   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1294 {
1295   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1296 
1297   PetscFunctionBegin;
1298   PetscCall(MatScale(a->A, aa));
1299   PetscCall(MatScale(a->B, aa));
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1304 {
1305   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1306   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1307   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1308   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1309   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1310 
1311   PetscFunctionBegin;
1312   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1313   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1314   mat->getrowactive = PETSC_TRUE;
1315 
1316   if (!mat->rowvalues && (idx || v)) {
1317     /*
1318         allocate enough space to hold information from the longest row.
1319     */
1320     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1321     PetscInt     max = 1, mbs = mat->mbs, tmp;
1322     for (i = 0; i < mbs; i++) {
1323       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1324       if (max < tmp) max = tmp;
1325     }
1326     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1327   }
1328   lrow = row - brstart;
1329 
1330   pvA = &vworkA;
1331   pcA = &cworkA;
1332   pvB = &vworkB;
1333   pcB = &cworkB;
1334   if (!v) {
1335     pvA = NULL;
1336     pvB = NULL;
1337   }
1338   if (!idx) {
1339     pcA = NULL;
1340     if (!v) pcB = NULL;
1341   }
1342   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1343   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1344   nztot = nzA + nzB;
1345 
1346   cmap = mat->garray;
1347   if (v || idx) {
1348     if (nztot) {
1349       /* Sort by increasing column numbers, assuming A and B already sorted */
1350       PetscInt imark = -1;
1351       if (v) {
1352         *v = v_p = mat->rowvalues;
1353         for (i = 0; i < nzB; i++) {
1354           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1355           else break;
1356         }
1357         imark = i;
1358         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1359         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1360       }
1361       if (idx) {
1362         *idx = idx_p = mat->rowindices;
1363         if (imark > -1) {
1364           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1365         } else {
1366           for (i = 0; i < nzB; i++) {
1367             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1368             else break;
1369           }
1370           imark = i;
1371         }
1372         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1373         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1374       }
1375     } else {
1376       if (idx) *idx = NULL;
1377       if (v) *v = NULL;
1378     }
1379   }
1380   *nz = nztot;
1381   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1382   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1387 {
1388   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1389 
1390   PetscFunctionBegin;
1391   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1392   baij->getrowactive = PETSC_FALSE;
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1397 {
1398   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1399 
1400   PetscFunctionBegin;
1401   PetscCall(MatZeroEntries(l->A));
1402   PetscCall(MatZeroEntries(l->B));
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1407 {
1408   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1409   Mat            A = a->A, B = a->B;
1410   PetscLogDouble isend[5], irecv[5];
1411 
1412   PetscFunctionBegin;
1413   info->block_size = (PetscReal)matin->rmap->bs;
1414 
1415   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1416 
1417   isend[0] = info->nz_used;
1418   isend[1] = info->nz_allocated;
1419   isend[2] = info->nz_unneeded;
1420   isend[3] = info->memory;
1421   isend[4] = info->mallocs;
1422 
1423   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1424 
1425   isend[0] += info->nz_used;
1426   isend[1] += info->nz_allocated;
1427   isend[2] += info->nz_unneeded;
1428   isend[3] += info->memory;
1429   isend[4] += info->mallocs;
1430 
1431   if (flag == MAT_LOCAL) {
1432     info->nz_used      = isend[0];
1433     info->nz_allocated = isend[1];
1434     info->nz_unneeded  = isend[2];
1435     info->memory       = isend[3];
1436     info->mallocs      = isend[4];
1437   } else if (flag == MAT_GLOBAL_MAX) {
1438     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1439 
1440     info->nz_used      = irecv[0];
1441     info->nz_allocated = irecv[1];
1442     info->nz_unneeded  = irecv[2];
1443     info->memory       = irecv[3];
1444     info->mallocs      = irecv[4];
1445   } else if (flag == MAT_GLOBAL_SUM) {
1446     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1454   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1455   info->fill_ratio_needed = 0;
1456   info->factor_mallocs    = 0;
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1461 {
1462   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1463 
1464   PetscFunctionBegin;
1465   switch (op) {
1466   case MAT_NEW_NONZERO_LOCATIONS:
1467   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1468   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1469   case MAT_KEEP_NONZERO_PATTERN:
1470   case MAT_NEW_NONZERO_LOCATION_ERR:
1471     MatCheckPreallocated(A, 1);
1472     PetscCall(MatSetOption(a->A, op, flg));
1473     PetscCall(MatSetOption(a->B, op, flg));
1474     break;
1475   case MAT_ROW_ORIENTED:
1476     MatCheckPreallocated(A, 1);
1477     a->roworiented = flg;
1478 
1479     PetscCall(MatSetOption(a->A, op, flg));
1480     PetscCall(MatSetOption(a->B, op, flg));
1481     break;
1482   case MAT_FORCE_DIAGONAL_ENTRIES:
1483   case MAT_SORTED_FULL:
1484     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1485     break;
1486   case MAT_IGNORE_OFF_PROC_ENTRIES:
1487     a->donotstash = flg;
1488     break;
1489   case MAT_USE_HASH_TABLE:
1490     a->ht_flag = flg;
1491     a->ht_fact = 1.39;
1492     break;
1493   case MAT_SYMMETRIC:
1494   case MAT_STRUCTURALLY_SYMMETRIC:
1495   case MAT_HERMITIAN:
1496   case MAT_SUBMAT_SINGLEIS:
1497   case MAT_SYMMETRY_ETERNAL:
1498   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1499   case MAT_SPD_ETERNAL:
1500     /* if the diagonal matrix is square it inherits some of the properties above */
1501     break;
1502   default:
1503     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1504   }
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1509 {
1510   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1511   Mat_SeqBAIJ *Aloc;
1512   Mat          B;
1513   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1514   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1515   MatScalar   *a;
1516 
1517   PetscFunctionBegin;
1518   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1519   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1520     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1521     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1522     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1523     /* Do not know preallocation information, but must set block size */
1524     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1525   } else {
1526     B = *matout;
1527   }
1528 
1529   /* copy over the A part */
1530   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1531   ai   = Aloc->i;
1532   aj   = Aloc->j;
1533   a    = Aloc->a;
1534   PetscCall(PetscMalloc1(bs, &rvals));
1535 
1536   for (i = 0; i < mbs; i++) {
1537     rvals[0] = bs * (baij->rstartbs + i);
1538     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1539     for (j = ai[i]; j < ai[i + 1]; j++) {
1540       col = (baij->cstartbs + aj[j]) * bs;
1541       for (k = 0; k < bs; k++) {
1542         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1543 
1544         col++;
1545         a += bs;
1546       }
1547     }
1548   }
1549   /* copy over the B part */
1550   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1551   ai   = Aloc->i;
1552   aj   = Aloc->j;
1553   a    = Aloc->a;
1554   for (i = 0; i < mbs; i++) {
1555     rvals[0] = bs * (baij->rstartbs + i);
1556     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1557     for (j = ai[i]; j < ai[i + 1]; j++) {
1558       col = baij->garray[aj[j]] * bs;
1559       for (k = 0; k < bs; k++) {
1560         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1561         col++;
1562         a += bs;
1563       }
1564     }
1565   }
1566   PetscCall(PetscFree(rvals));
1567   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1568   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1569 
1570   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1571   else PetscCall(MatHeaderMerge(A, &B));
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1576 {
1577   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1578   Mat          a = baij->A, b = baij->B;
1579   PetscInt     s1, s2, s3;
1580 
1581   PetscFunctionBegin;
1582   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1583   if (rr) {
1584     PetscCall(VecGetLocalSize(rr, &s1));
1585     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1586     /* Overlap communication with computation. */
1587     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1588   }
1589   if (ll) {
1590     PetscCall(VecGetLocalSize(ll, &s1));
1591     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1592     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1593   }
1594   /* scale  the diagonal block */
1595   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1596 
1597   if (rr) {
1598     /* Do a scatter end and then right scale the off-diagonal block */
1599     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1600     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1601   }
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1606 {
1607   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1608   PetscInt    *lrows;
1609   PetscInt     r, len;
1610   PetscBool    cong;
1611 
1612   PetscFunctionBegin;
1613   /* get locally owned rows */
1614   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1615   /* fix right hand side if needed */
1616   if (x && b) {
1617     const PetscScalar *xx;
1618     PetscScalar       *bb;
1619 
1620     PetscCall(VecGetArrayRead(x, &xx));
1621     PetscCall(VecGetArray(b, &bb));
1622     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1623     PetscCall(VecRestoreArrayRead(x, &xx));
1624     PetscCall(VecRestoreArray(b, &bb));
1625   }
1626 
1627   /* actually zap the local rows */
1628   /*
1629         Zero the required rows. If the "diagonal block" of the matrix
1630      is square and the user wishes to set the diagonal we use separate
1631      code so that MatSetValues() is not called for each diagonal allocating
1632      new memory, thus calling lots of mallocs and slowing things down.
1633 
1634   */
1635   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1636   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1637   PetscCall(MatHasCongruentLayouts(A, &cong));
1638   if ((diag != 0.0) && cong) {
1639     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1640   } else if (diag != 0.0) {
1641     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1642     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1643        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1644     for (r = 0; r < len; ++r) {
1645       const PetscInt row = lrows[r] + A->rmap->rstart;
1646       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1647     }
1648     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1649     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1650   } else {
1651     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1652   }
1653   PetscCall(PetscFree(lrows));
1654 
1655   /* only change matrix nonzero state if pattern was allowed to be changed */
1656   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1657     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1658     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1659   }
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1664 {
1665   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1666   PetscMPIInt        n = A->rmap->n, p = 0;
1667   PetscInt           i, j, k, r, len = 0, row, col, count;
1668   PetscInt          *lrows, *owners = A->rmap->range;
1669   PetscSFNode       *rrows;
1670   PetscSF            sf;
1671   const PetscScalar *xx;
1672   PetscScalar       *bb, *mask;
1673   Vec                xmask, lmask;
1674   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1675   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1676   PetscScalar       *aa;
1677 
1678   PetscFunctionBegin;
1679   /* Create SF where leaves are input rows and roots are owned rows */
1680   PetscCall(PetscMalloc1(n, &lrows));
1681   for (r = 0; r < n; ++r) lrows[r] = -1;
1682   PetscCall(PetscMalloc1(N, &rrows));
1683   for (r = 0; r < N; ++r) {
1684     const PetscInt idx = rows[r];
1685     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1686     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1687       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1688     }
1689     rrows[r].rank  = p;
1690     rrows[r].index = rows[r] - owners[p];
1691   }
1692   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1693   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1694   /* Collect flags for rows to be zeroed */
1695   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1696   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1697   PetscCall(PetscSFDestroy(&sf));
1698   /* Compress and put in row numbers */
1699   for (r = 0; r < n; ++r)
1700     if (lrows[r] >= 0) lrows[len++] = r;
1701   /* zero diagonal part of matrix */
1702   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1703   /* handle off diagonal part of matrix */
1704   PetscCall(MatCreateVecs(A, &xmask, NULL));
1705   PetscCall(VecDuplicate(l->lvec, &lmask));
1706   PetscCall(VecGetArray(xmask, &bb));
1707   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1708   PetscCall(VecRestoreArray(xmask, &bb));
1709   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1710   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1711   PetscCall(VecDestroy(&xmask));
1712   if (x) {
1713     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1714     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1715     PetscCall(VecGetArrayRead(l->lvec, &xx));
1716     PetscCall(VecGetArray(b, &bb));
1717   }
1718   PetscCall(VecGetArray(lmask, &mask));
1719   /* remove zeroed rows of off diagonal matrix */
1720   for (i = 0; i < len; ++i) {
1721     row   = lrows[i];
1722     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1723     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1724     for (k = 0; k < count; ++k) {
1725       aa[0] = 0.0;
1726       aa += bs;
1727     }
1728   }
1729   /* loop over all elements of off process part of matrix zeroing removed columns*/
1730   for (i = 0; i < l->B->rmap->N; ++i) {
1731     row = i / bs;
1732     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1733       for (k = 0; k < bs; ++k) {
1734         col = bs * baij->j[j] + k;
1735         if (PetscAbsScalar(mask[col])) {
1736           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1737           if (x) bb[i] -= aa[0] * xx[col];
1738           aa[0] = 0.0;
1739         }
1740       }
1741     }
1742   }
1743   if (x) {
1744     PetscCall(VecRestoreArray(b, &bb));
1745     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1746   }
1747   PetscCall(VecRestoreArray(lmask, &mask));
1748   PetscCall(VecDestroy(&lmask));
1749   PetscCall(PetscFree(lrows));
1750 
1751   /* only change matrix nonzero state if pattern was allowed to be changed */
1752   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1753     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1754     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1760 {
1761   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1762 
1763   PetscFunctionBegin;
1764   PetscCall(MatSetUnfactored(a->A));
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1769 
1770 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1771 {
1772   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1773   Mat          a, b, c, d;
1774   PetscBool    flg;
1775 
1776   PetscFunctionBegin;
1777   a = matA->A;
1778   b = matA->B;
1779   c = matB->A;
1780   d = matB->B;
1781 
1782   PetscCall(MatEqual(a, c, &flg));
1783   if (flg) PetscCall(MatEqual(b, d, &flg));
1784   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1789 {
1790   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1791   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1792 
1793   PetscFunctionBegin;
1794   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1795   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1796     PetscCall(MatCopy_Basic(A, B, str));
1797   } else {
1798     PetscCall(MatCopy(a->A, b->A, str));
1799     PetscCall(MatCopy(a->B, b->B, str));
1800   }
1801   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1806 {
1807   PetscFunctionBegin;
1808   PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1813 {
1814   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1815   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1816   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1817 
1818   PetscFunctionBegin;
1819   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1824 {
1825   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1826   PetscBLASInt bnz, one                         = 1;
1827   Mat_SeqBAIJ *x, *y;
1828   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1829 
1830   PetscFunctionBegin;
1831   if (str == SAME_NONZERO_PATTERN) {
1832     PetscScalar alpha = a;
1833     x                 = (Mat_SeqBAIJ *)xx->A->data;
1834     y                 = (Mat_SeqBAIJ *)yy->A->data;
1835     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1836     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1837     x = (Mat_SeqBAIJ *)xx->B->data;
1838     y = (Mat_SeqBAIJ *)yy->B->data;
1839     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1840     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1841     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1842   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1843     PetscCall(MatAXPY_Basic(Y, a, X, str));
1844   } else {
1845     Mat       B;
1846     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1847     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1848     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1849     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1850     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1851     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1852     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1853     PetscCall(MatSetType(B, MATMPIBAIJ));
1854     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1855     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1856     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1857     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1858     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1859     PetscCall(MatHeaderMerge(Y, &B));
1860     PetscCall(PetscFree(nnz_d));
1861     PetscCall(PetscFree(nnz_o));
1862   }
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1867 {
1868   PetscFunctionBegin;
1869   if (PetscDefined(USE_COMPLEX)) {
1870     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1871 
1872     PetscCall(MatConjugate_SeqBAIJ(a->A));
1873     PetscCall(MatConjugate_SeqBAIJ(a->B));
1874   }
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1879 {
1880   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1881 
1882   PetscFunctionBegin;
1883   PetscCall(MatRealPart(a->A));
1884   PetscCall(MatRealPart(a->B));
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1889 {
1890   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1891 
1892   PetscFunctionBegin;
1893   PetscCall(MatImaginaryPart(a->A));
1894   PetscCall(MatImaginaryPart(a->B));
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1899 {
1900   IS       iscol_local;
1901   PetscInt csize;
1902 
1903   PetscFunctionBegin;
1904   PetscCall(ISGetLocalSize(iscol, &csize));
1905   if (call == MAT_REUSE_MATRIX) {
1906     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1907     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1908   } else {
1909     PetscCall(ISAllGather(iscol, &iscol_local));
1910   }
1911   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1912   if (call == MAT_INITIAL_MATRIX) {
1913     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1914     PetscCall(ISDestroy(&iscol_local));
1915   }
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 /*
1920   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1921   in local and then by concatenating the local matrices the end result.
1922   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1923   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1924 */
1925 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
1926 {
1927   PetscMPIInt  rank, size;
1928   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1929   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1930   Mat          M, Mreuse;
1931   MatScalar   *vwork, *aa;
1932   MPI_Comm     comm;
1933   IS           isrow_new, iscol_new;
1934   Mat_SeqBAIJ *aij;
1935 
1936   PetscFunctionBegin;
1937   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1938   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1939   PetscCallMPI(MPI_Comm_size(comm, &size));
1940   /* The compression and expansion should be avoided. Doesn't point
1941      out errors, might change the indices, hence buggey */
1942   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1943   PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1944 
1945   if (call == MAT_REUSE_MATRIX) {
1946     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1947     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1948     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse));
1949   } else {
1950     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse));
1951   }
1952   PetscCall(ISDestroy(&isrow_new));
1953   PetscCall(ISDestroy(&iscol_new));
1954   /*
1955       m - number of local rows
1956       n - number of columns (same on all processors)
1957       rstart - first row in new global matrix generated
1958   */
1959   PetscCall(MatGetBlockSize(mat, &bs));
1960   PetscCall(MatGetSize(Mreuse, &m, &n));
1961   m = m / bs;
1962   n = n / bs;
1963 
1964   if (call == MAT_INITIAL_MATRIX) {
1965     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1966     ii  = aij->i;
1967     jj  = aij->j;
1968 
1969     /*
1970         Determine the number of non-zeros in the diagonal and off-diagonal
1971         portions of the matrix in order to do correct preallocation
1972     */
1973 
1974     /* first get start and end of "diagonal" columns */
1975     if (csize == PETSC_DECIDE) {
1976       PetscCall(ISGetSize(isrow, &mglobal));
1977       if (mglobal == n * bs) { /* square matrix */
1978         nlocal = m;
1979       } else {
1980         nlocal = n / size + ((n % size) > rank);
1981       }
1982     } else {
1983       nlocal = csize / bs;
1984     }
1985     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1986     rstart = rend - nlocal;
1987     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1988 
1989     /* next, compute all the lengths */
1990     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1991     for (i = 0; i < m; i++) {
1992       jend = ii[i + 1] - ii[i];
1993       olen = 0;
1994       dlen = 0;
1995       for (j = 0; j < jend; j++) {
1996         if (*jj < rstart || *jj >= rend) olen++;
1997         else dlen++;
1998         jj++;
1999       }
2000       olens[i] = olen;
2001       dlens[i] = dlen;
2002     }
2003     PetscCall(MatCreate(comm, &M));
2004     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2005     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
2006     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2007     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2008     PetscCall(PetscFree2(dlens, olens));
2009   } else {
2010     PetscInt ml, nl;
2011 
2012     M = *newmat;
2013     PetscCall(MatGetLocalSize(M, &ml, &nl));
2014     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2015     PetscCall(MatZeroEntries(M));
2016     /*
2017          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2018        rather than the slower MatSetValues().
2019     */
2020     M->was_assembled = PETSC_TRUE;
2021     M->assembled     = PETSC_FALSE;
2022   }
2023   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2024   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2025   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2026   ii  = aij->i;
2027   jj  = aij->j;
2028   aa  = aij->a;
2029   for (i = 0; i < m; i++) {
2030     row   = rstart / bs + i;
2031     nz    = ii[i + 1] - ii[i];
2032     cwork = jj;
2033     jj += nz;
2034     vwork = aa;
2035     aa += nz * bs * bs;
2036     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2037   }
2038 
2039   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2040   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2041   *newmat = M;
2042 
2043   /* save submatrix used in processor for next request */
2044   if (call == MAT_INITIAL_MATRIX) {
2045     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2046     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2052 {
2053   MPI_Comm        comm, pcomm;
2054   PetscInt        clocal_size, nrows;
2055   const PetscInt *rows;
2056   PetscMPIInt     size;
2057   IS              crowp, lcolp;
2058 
2059   PetscFunctionBegin;
2060   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2061   /* make a collective version of 'rowp' */
2062   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2063   if (pcomm == comm) {
2064     crowp = rowp;
2065   } else {
2066     PetscCall(ISGetSize(rowp, &nrows));
2067     PetscCall(ISGetIndices(rowp, &rows));
2068     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2069     PetscCall(ISRestoreIndices(rowp, &rows));
2070   }
2071   PetscCall(ISSetPermutation(crowp));
2072   /* make a local version of 'colp' */
2073   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2074   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2075   if (size == 1) {
2076     lcolp = colp;
2077   } else {
2078     PetscCall(ISAllGather(colp, &lcolp));
2079   }
2080   PetscCall(ISSetPermutation(lcolp));
2081   /* now we just get the submatrix */
2082   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2083   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B));
2084   /* clean up */
2085   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2086   if (size > 1) PetscCall(ISDestroy(&lcolp));
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2091 {
2092   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2093   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2094 
2095   PetscFunctionBegin;
2096   if (nghosts) *nghosts = B->nbs;
2097   if (ghosts) *ghosts = baij->garray;
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2102 {
2103   Mat          B;
2104   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2105   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2106   Mat_SeqAIJ  *b;
2107   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2108   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2109   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2110 
2111   PetscFunctionBegin;
2112   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2113   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2114 
2115   /* ----------------------------------------------------------------
2116      Tell every processor the number of nonzeros per row
2117   */
2118   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2119   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2120   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2121   displs = recvcounts + size;
2122   for (i = 0; i < size; i++) {
2123     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2124     displs[i]     = A->rmap->range[i] / bs;
2125   }
2126   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2127   /* ---------------------------------------------------------------
2128      Create the sequential matrix of the same type as the local block diagonal
2129   */
2130   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2131   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2132   PetscCall(MatSetType(B, MATSEQAIJ));
2133   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2134   b = (Mat_SeqAIJ *)B->data;
2135 
2136   /*--------------------------------------------------------------------
2137     Copy my part of matrix column indices over
2138   */
2139   sendcount  = ad->nz + bd->nz;
2140   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2141   a_jsendbuf = ad->j;
2142   b_jsendbuf = bd->j;
2143   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2144   cnt        = 0;
2145   for (i = 0; i < n; i++) {
2146     /* put in lower diagonal portion */
2147     m = bd->i[i + 1] - bd->i[i];
2148     while (m > 0) {
2149       /* is it above diagonal (in bd (compressed) numbering) */
2150       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2151       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2152       m--;
2153     }
2154 
2155     /* put in diagonal portion */
2156     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2157 
2158     /* put in upper diagonal portion */
2159     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2160   }
2161   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2162 
2163   /*--------------------------------------------------------------------
2164     Gather all column indices to all processors
2165   */
2166   for (i = 0; i < size; i++) {
2167     recvcounts[i] = 0;
2168     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2169   }
2170   displs[0] = 0;
2171   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2172   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2173   /*--------------------------------------------------------------------
2174     Assemble the matrix into useable form (note numerical values not yet set)
2175   */
2176   /* set the b->ilen (length of each row) values */
2177   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2178   /* set the b->i indices */
2179   b->i[0] = 0;
2180   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2181   PetscCall(PetscFree(lens));
2182   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2183   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2184   PetscCall(PetscFree(recvcounts));
2185 
2186   PetscCall(MatPropagateSymmetryOptions(A, B));
2187   *newmat = B;
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2192 {
2193   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2194   Vec          bb1 = NULL;
2195 
2196   PetscFunctionBegin;
2197   if (flag == SOR_APPLY_UPPER) {
2198     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2199     PetscFunctionReturn(0);
2200   }
2201 
2202   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2203 
2204   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2205     if (flag & SOR_ZERO_INITIAL_GUESS) {
2206       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2207       its--;
2208     }
2209 
2210     while (its--) {
2211       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2212       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2213 
2214       /* update rhs: bb1 = bb - B*x */
2215       PetscCall(VecScale(mat->lvec, -1.0));
2216       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2217 
2218       /* local sweep */
2219       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2220     }
2221   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2222     if (flag & SOR_ZERO_INITIAL_GUESS) {
2223       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2224       its--;
2225     }
2226     while (its--) {
2227       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2228       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2229 
2230       /* update rhs: bb1 = bb - B*x */
2231       PetscCall(VecScale(mat->lvec, -1.0));
2232       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2233 
2234       /* local sweep */
2235       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2236     }
2237   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2238     if (flag & SOR_ZERO_INITIAL_GUESS) {
2239       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2240       its--;
2241     }
2242     while (its--) {
2243       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2244       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2245 
2246       /* update rhs: bb1 = bb - B*x */
2247       PetscCall(VecScale(mat->lvec, -1.0));
2248       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2249 
2250       /* local sweep */
2251       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2252     }
2253   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2254 
2255   PetscCall(VecDestroy(&bb1));
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2260 {
2261   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2262   PetscInt     m, N, i, *garray = aij->garray;
2263   PetscInt     ib, jb, bs = A->rmap->bs;
2264   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2265   MatScalar   *a_val = a_aij->a;
2266   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2267   MatScalar   *b_val = b_aij->a;
2268   PetscReal   *work;
2269 
2270   PetscFunctionBegin;
2271   PetscCall(MatGetSize(A, &m, &N));
2272   PetscCall(PetscCalloc1(N, &work));
2273   if (type == NORM_2) {
2274     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2275       for (jb = 0; jb < bs; jb++) {
2276         for (ib = 0; ib < bs; ib++) {
2277           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2278           a_val++;
2279         }
2280       }
2281     }
2282     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2283       for (jb = 0; jb < bs; jb++) {
2284         for (ib = 0; ib < bs; ib++) {
2285           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2286           b_val++;
2287         }
2288       }
2289     }
2290   } else if (type == NORM_1) {
2291     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2292       for (jb = 0; jb < bs; jb++) {
2293         for (ib = 0; ib < bs; ib++) {
2294           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2295           a_val++;
2296         }
2297       }
2298     }
2299     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2300       for (jb = 0; jb < bs; jb++) {
2301         for (ib = 0; ib < bs; ib++) {
2302           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2303           b_val++;
2304         }
2305       }
2306     }
2307   } else if (type == NORM_INFINITY) {
2308     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2309       for (jb = 0; jb < bs; jb++) {
2310         for (ib = 0; ib < bs; ib++) {
2311           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2312           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2313           a_val++;
2314         }
2315       }
2316     }
2317     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2318       for (jb = 0; jb < bs; jb++) {
2319         for (ib = 0; ib < bs; ib++) {
2320           int col   = garray[b_aij->j[i]] * bs + jb;
2321           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2322           b_val++;
2323         }
2324       }
2325     }
2326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2327     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2328       for (jb = 0; jb < bs; jb++) {
2329         for (ib = 0; ib < bs; ib++) {
2330           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2331           a_val++;
2332         }
2333       }
2334     }
2335     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2336       for (jb = 0; jb < bs; jb++) {
2337         for (ib = 0; ib < bs; ib++) {
2338           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2339           b_val++;
2340         }
2341       }
2342     }
2343   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2344     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2345       for (jb = 0; jb < bs; jb++) {
2346         for (ib = 0; ib < bs; ib++) {
2347           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2348           a_val++;
2349         }
2350       }
2351     }
2352     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2353       for (jb = 0; jb < bs; jb++) {
2354         for (ib = 0; ib < bs; ib++) {
2355           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2356           b_val++;
2357         }
2358       }
2359     }
2360   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2361   if (type == NORM_INFINITY) {
2362     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2363   } else {
2364     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2365   }
2366   PetscCall(PetscFree(work));
2367   if (type == NORM_2) {
2368     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2369   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2370     for (i = 0; i < N; i++) reductions[i] /= m;
2371   }
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2376 {
2377   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2378 
2379   PetscFunctionBegin;
2380   PetscCall(MatInvertBlockDiagonal(a->A, values));
2381   A->factorerrortype             = a->A->factorerrortype;
2382   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2383   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2388 {
2389   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2390   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2391 
2392   PetscFunctionBegin;
2393   if (!Y->preallocated) {
2394     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2395   } else if (!aij->nz) {
2396     PetscInt nonew = aij->nonew;
2397     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2398     aij->nonew = nonew;
2399   }
2400   PetscCall(MatShift_Basic(Y, a));
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2405 {
2406   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2407 
2408   PetscFunctionBegin;
2409   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2410   PetscCall(MatMissingDiagonal(a->A, missing, d));
2411   if (d) {
2412     PetscInt rstart;
2413     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2414     *d += rstart / A->rmap->bs;
2415   }
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2420 {
2421   PetscFunctionBegin;
2422   *a = ((Mat_MPIBAIJ *)A->data)->A;
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 /* -------------------------------------------------------------------*/
2427 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2428                                        MatGetRow_MPIBAIJ,
2429                                        MatRestoreRow_MPIBAIJ,
2430                                        MatMult_MPIBAIJ,
2431                                        /* 4*/ MatMultAdd_MPIBAIJ,
2432                                        MatMultTranspose_MPIBAIJ,
2433                                        MatMultTransposeAdd_MPIBAIJ,
2434                                        NULL,
2435                                        NULL,
2436                                        NULL,
2437                                        /*10*/ NULL,
2438                                        NULL,
2439                                        NULL,
2440                                        MatSOR_MPIBAIJ,
2441                                        MatTranspose_MPIBAIJ,
2442                                        /*15*/ MatGetInfo_MPIBAIJ,
2443                                        MatEqual_MPIBAIJ,
2444                                        MatGetDiagonal_MPIBAIJ,
2445                                        MatDiagonalScale_MPIBAIJ,
2446                                        MatNorm_MPIBAIJ,
2447                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2448                                        MatAssemblyEnd_MPIBAIJ,
2449                                        MatSetOption_MPIBAIJ,
2450                                        MatZeroEntries_MPIBAIJ,
2451                                        /*24*/ MatZeroRows_MPIBAIJ,
2452                                        NULL,
2453                                        NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        /*29*/ MatSetUp_MPIBAIJ,
2457                                        NULL,
2458                                        NULL,
2459                                        MatGetDiagonalBlock_MPIBAIJ,
2460                                        NULL,
2461                                        /*34*/ MatDuplicate_MPIBAIJ,
2462                                        NULL,
2463                                        NULL,
2464                                        NULL,
2465                                        NULL,
2466                                        /*39*/ MatAXPY_MPIBAIJ,
2467                                        MatCreateSubMatrices_MPIBAIJ,
2468                                        MatIncreaseOverlap_MPIBAIJ,
2469                                        MatGetValues_MPIBAIJ,
2470                                        MatCopy_MPIBAIJ,
2471                                        /*44*/ NULL,
2472                                        MatScale_MPIBAIJ,
2473                                        MatShift_MPIBAIJ,
2474                                        NULL,
2475                                        MatZeroRowsColumns_MPIBAIJ,
2476                                        /*49*/ NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2482                                        NULL,
2483                                        MatSetUnfactored_MPIBAIJ,
2484                                        MatPermute_MPIBAIJ,
2485                                        MatSetValuesBlocked_MPIBAIJ,
2486                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2487                                        MatDestroy_MPIBAIJ,
2488                                        MatView_MPIBAIJ,
2489                                        NULL,
2490                                        NULL,
2491                                        /*64*/ NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2497                                        NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        /*74*/ NULL,
2502                                        MatFDColoringApply_BAIJ,
2503                                        NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        /*79*/ NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        MatLoad_MPIBAIJ,
2511                                        /*84*/ NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        /*89*/ NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        /*94*/ NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        /*99*/ NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        MatConjugate_MPIBAIJ,
2530                                        NULL,
2531                                        /*104*/ NULL,
2532                                        MatRealPart_MPIBAIJ,
2533                                        MatImaginaryPart_MPIBAIJ,
2534                                        NULL,
2535                                        NULL,
2536                                        /*109*/ NULL,
2537                                        NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        MatMissingDiagonal_MPIBAIJ,
2541                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2542                                        NULL,
2543                                        MatGetGhosts_MPIBAIJ,
2544                                        NULL,
2545                                        NULL,
2546                                        /*119*/ NULL,
2547                                        NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        MatGetMultiProcBlock_MPIBAIJ,
2551                                        /*124*/ NULL,
2552                                        MatGetColumnReductions_MPIBAIJ,
2553                                        MatInvertBlockDiagonal_MPIBAIJ,
2554                                        NULL,
2555                                        NULL,
2556                                        /*129*/ NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        /*134*/ NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        /*139*/ MatSetBlockSizes_Default,
2567                                        NULL,
2568                                        NULL,
2569                                        MatFDColoringSetUp_MPIXAIJ,
2570                                        NULL,
2571                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                        /*150*/ NULL};
2578 
2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2581 
2582 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2583 {
2584   PetscInt        m, rstart, cstart, cend;
2585   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2586   const PetscInt *JJ          = NULL;
2587   PetscScalar    *values      = NULL;
2588   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2589   PetscBool       nooffprocentries;
2590 
2591   PetscFunctionBegin;
2592   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2593   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2594   PetscCall(PetscLayoutSetUp(B->rmap));
2595   PetscCall(PetscLayoutSetUp(B->cmap));
2596   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2597   m      = B->rmap->n / bs;
2598   rstart = B->rmap->rstart / bs;
2599   cstart = B->cmap->rstart / bs;
2600   cend   = B->cmap->rend / bs;
2601 
2602   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2603   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2604   for (i = 0; i < m; i++) {
2605     nz = ii[i + 1] - ii[i];
2606     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2607     nz_max = PetscMax(nz_max, nz);
2608     dlen   = 0;
2609     olen   = 0;
2610     JJ     = jj + ii[i];
2611     for (j = 0; j < nz; j++) {
2612       if (*JJ < cstart || *JJ >= cend) olen++;
2613       else dlen++;
2614       JJ++;
2615     }
2616     d_nnz[i] = dlen;
2617     o_nnz[i] = olen;
2618   }
2619   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2620   PetscCall(PetscFree2(d_nnz, o_nnz));
2621 
2622   values = (PetscScalar *)V;
2623   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2624   for (i = 0; i < m; i++) {
2625     PetscInt        row   = i + rstart;
2626     PetscInt        ncols = ii[i + 1] - ii[i];
2627     const PetscInt *icols = jj + ii[i];
2628     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2629       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2630       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2631     } else { /* block ordering does not match so we can only insert one block at a time. */
2632       PetscInt j;
2633       for (j = 0; j < ncols; j++) {
2634         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2635         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2636       }
2637     }
2638   }
2639 
2640   if (!V) PetscCall(PetscFree(values));
2641   nooffprocentries    = B->nooffprocentries;
2642   B->nooffprocentries = PETSC_TRUE;
2643   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2644   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2645   B->nooffprocentries = nooffprocentries;
2646 
2647   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 /*@C
2652    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2653 
2654    Collective
2655 
2656    Input Parameters:
2657 +  B - the matrix
2658 .  bs - the block size
2659 .  i - the indices into j for the start of each local row (starts with zero)
2660 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2661 -  v - optional values in the matrix
2662 
2663    Level: advanced
2664 
2665    Notes:
2666     The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2667    may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2668    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2669    `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2670    block column and the second index is over columns within a block.
2671 
2672    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2673 
2674 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2675 @*/
2676 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2677 {
2678   PetscFunctionBegin;
2679   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2680   PetscValidType(B, 1);
2681   PetscValidLogicalCollectiveInt(B, bs, 2);
2682   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2687 {
2688   Mat_MPIBAIJ *b;
2689   PetscInt     i;
2690   PetscMPIInt  size;
2691 
2692   PetscFunctionBegin;
2693   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2694   PetscCall(PetscLayoutSetUp(B->rmap));
2695   PetscCall(PetscLayoutSetUp(B->cmap));
2696   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2697 
2698   if (d_nnz) {
2699     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2700   }
2701   if (o_nnz) {
2702     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2703   }
2704 
2705   b      = (Mat_MPIBAIJ *)B->data;
2706   b->bs2 = bs * bs;
2707   b->mbs = B->rmap->n / bs;
2708   b->nbs = B->cmap->n / bs;
2709   b->Mbs = B->rmap->N / bs;
2710   b->Nbs = B->cmap->N / bs;
2711 
2712   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2713   b->rstartbs = B->rmap->rstart / bs;
2714   b->rendbs   = B->rmap->rend / bs;
2715   b->cstartbs = B->cmap->rstart / bs;
2716   b->cendbs   = B->cmap->rend / bs;
2717 
2718 #if defined(PETSC_USE_CTABLE)
2719   PetscCall(PetscTableDestroy(&b->colmap));
2720 #else
2721   PetscCall(PetscFree(b->colmap));
2722 #endif
2723   PetscCall(PetscFree(b->garray));
2724   PetscCall(VecDestroy(&b->lvec));
2725   PetscCall(VecScatterDestroy(&b->Mvctx));
2726 
2727   /* Because the B will have been resized we simply destroy it and create a new one each time */
2728   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2729   PetscCall(MatDestroy(&b->B));
2730   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2731   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2732   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2733 
2734   if (!B->preallocated) {
2735     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2736     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2737     PetscCall(MatSetType(b->A, MATSEQBAIJ));
2738     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2739   }
2740 
2741   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2742   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2743   B->preallocated  = PETSC_TRUE;
2744   B->was_assembled = PETSC_FALSE;
2745   B->assembled     = PETSC_FALSE;
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2750 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2751 
2752 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2753 {
2754   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2755   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2756   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2757   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2758 
2759   PetscFunctionBegin;
2760   PetscCall(PetscMalloc1(M + 1, &ii));
2761   ii[0] = 0;
2762   for (i = 0; i < M; i++) {
2763     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2764     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2765     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2766     /* remove one from count of matrix has diagonal */
2767     for (j = id[i]; j < id[i + 1]; j++) {
2768       if (jd[j] == i) {
2769         ii[i + 1]--;
2770         break;
2771       }
2772     }
2773   }
2774   PetscCall(PetscMalloc1(ii[M], &jj));
2775   cnt = 0;
2776   for (i = 0; i < M; i++) {
2777     for (j = io[i]; j < io[i + 1]; j++) {
2778       if (garray[jo[j]] > rstart) break;
2779       jj[cnt++] = garray[jo[j]];
2780     }
2781     for (k = id[i]; k < id[i + 1]; k++) {
2782       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2783     }
2784     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2785   }
2786   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2791 
2792 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2793 
2794 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2795 {
2796   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2797   Mat_MPIAIJ  *b;
2798   Mat          B;
2799 
2800   PetscFunctionBegin;
2801   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2802 
2803   if (reuse == MAT_REUSE_MATRIX) {
2804     B = *newmat;
2805   } else {
2806     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2807     PetscCall(MatSetType(B, MATMPIAIJ));
2808     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2809     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2810     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2811     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2812   }
2813   b = (Mat_MPIAIJ *)B->data;
2814 
2815   if (reuse == MAT_REUSE_MATRIX) {
2816     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2817     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2818   } else {
2819     PetscBool3 sym = A->symmetric, hermitian = A->hermitian, structurally_symmetric = A->structurally_symmetric, spd = A->spd;
2820     PetscCall(MatDestroy(&b->A));
2821     PetscCall(MatDestroy(&b->B));
2822     PetscCall(MatDisAssemble_MPIBAIJ(A));
2823     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2824     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2825     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2826     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2827     A->symmetric              = sym;
2828     A->hermitian              = hermitian;
2829     A->structurally_symmetric = structurally_symmetric;
2830     A->spd                    = spd;
2831   }
2832   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2833   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2834 
2835   if (reuse == MAT_INPLACE_MATRIX) {
2836     PetscCall(MatHeaderReplace(A, &B));
2837   } else {
2838     *newmat = B;
2839   }
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 /*MC
2844    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2845 
2846    Options Database Keys:
2847 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2848 . -mat_block_size <bs> - set the blocksize used to store the matrix
2849 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2850 - -mat_use_hash_table <fact> - set hash table factor
2851 
2852    Level: beginner
2853 
2854    Note:
2855     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
2856     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2857 
2858 .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ`
2859 M*/
2860 
2861 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2862 
2863 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2864 {
2865   Mat_MPIBAIJ *b;
2866   PetscBool    flg = PETSC_FALSE;
2867 
2868   PetscFunctionBegin;
2869   PetscCall(PetscNew(&b));
2870   B->data = (void *)b;
2871 
2872   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2873   B->assembled = PETSC_FALSE;
2874 
2875   B->insertmode = NOT_SET_VALUES;
2876   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2877   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2878 
2879   /* build local table of row and column ownerships */
2880   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2881 
2882   /* build cache for off array entries formed */
2883   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2884 
2885   b->donotstash  = PETSC_FALSE;
2886   b->colmap      = NULL;
2887   b->garray      = NULL;
2888   b->roworiented = PETSC_TRUE;
2889 
2890   /* stuff used in block assembly */
2891   b->barray = NULL;
2892 
2893   /* stuff used for matrix vector multiply */
2894   b->lvec  = NULL;
2895   b->Mvctx = NULL;
2896 
2897   /* stuff for MatGetRow() */
2898   b->rowindices   = NULL;
2899   b->rowvalues    = NULL;
2900   b->getrowactive = PETSC_FALSE;
2901 
2902   /* hash table stuff */
2903   b->ht           = NULL;
2904   b->hd           = NULL;
2905   b->ht_size      = 0;
2906   b->ht_flag      = PETSC_FALSE;
2907   b->ht_fact      = 0;
2908   b->ht_total_ct  = 0;
2909   b->ht_insert_ct = 0;
2910 
2911   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2912   b->ijonly = PETSC_FALSE;
2913 
2914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2917 #if defined(PETSC_HAVE_HYPRE)
2918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2919 #endif
2920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2927   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2928 
2929   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2930   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2931   if (flg) {
2932     PetscReal fact = 1.39;
2933     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2934     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2935     if (fact <= 1.0) fact = 1.39;
2936     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2937     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2938   }
2939   PetscOptionsEnd();
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 /*MC
2944    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2945 
2946    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2947    and `MATMPIBAIJ` otherwise.
2948 
2949    Options Database Keys:
2950 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2951 
2952   Level: beginner
2953 
2954 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2955 M*/
2956 
2957 /*@C
2958    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2959    (block compressed row).  For good matrix assembly performance
2960    the user should preallocate the matrix storage by setting the parameters
2961    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2962    performance can be increased by more than a factor of 50.
2963 
2964    Collective
2965 
2966    Input Parameters:
2967 +  B - the matrix
2968 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2969           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2970 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2971            submatrix  (same for all local rows)
2972 .  d_nnz - array containing the number of block nonzeros in the various block rows
2973            of the in diagonal portion of the local (possibly different for each block
2974            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2975            set it even if it is zero.
2976 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2977            submatrix (same for all local rows).
2978 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2979            off-diagonal portion of the local submatrix (possibly different for
2980            each block row) or NULL.
2981 
2982    If the *_nnz parameter is given then the *_nz parameter is ignored
2983 
2984    Options Database Keys:
2985 +   -mat_block_size - size of the blocks to use
2986 -   -mat_use_hash_table <fact> - set hash table factor
2987 
2988    Notes:
2989    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2990    than it must be used on all processors that share the object for that argument.
2991 
2992    Storage Information:
2993    For a square global matrix we define each processor's diagonal portion
2994    to be its local rows and the corresponding columns (a square submatrix);
2995    each processor's off-diagonal portion encompasses the remainder of the
2996    local matrix (a rectangular submatrix).
2997 
2998    The user can specify preallocated storage for the diagonal part of
2999    the local submatrix with either d_nz or d_nnz (not both).  Set
3000    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3001    memory allocation.  Likewise, specify preallocated storage for the
3002    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3003 
3004    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3005    the figure below we depict these three local rows and all columns (0-11).
3006 
3007 .vb
3008            0 1 2 3 4 5 6 7 8 9 10 11
3009           --------------------------
3010    row 3  |o o o d d d o o o o  o  o
3011    row 4  |o o o d d d o o o o  o  o
3012    row 5  |o o o d d d o o o o  o  o
3013           --------------------------
3014 .ve
3015 
3016    Thus, any entries in the d locations are stored in the d (diagonal)
3017    submatrix, and any entries in the o locations are stored in the
3018    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3019    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3020 
3021    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3022    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3023    In general, for PDE problems in which most nonzeros are near the diagonal,
3024    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3025    or you will get TERRIBLE performance; see the users' manual chapter on
3026    matrices.
3027 
3028    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3029    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3030    You can also run with the option -info and look for messages with the string
3031    malloc in them to see if additional memory allocation was needed.
3032 
3033    Level: intermediate
3034 
3035 .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3036 @*/
3037 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3038 {
3039   PetscFunctionBegin;
3040   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3041   PetscValidType(B, 1);
3042   PetscValidLogicalCollectiveInt(B, bs, 2);
3043   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3044   PetscFunctionReturn(0);
3045 }
3046 
3047 /*@C
3048    MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3049    (block compressed row).  For good matrix assembly performance
3050    the user should preallocate the matrix storage by setting the parameters
3051    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3052    performance can be increased by more than a factor of 50.
3053 
3054    Collective
3055 
3056    Input Parameters:
3057 +  comm - MPI communicator
3058 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3059           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3060 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3061            This value should be the same as the local size used in creating the
3062            y vector for the matrix-vector product y = Ax.
3063 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3064            This value should be the same as the local size used in creating the
3065            x vector for the matrix-vector product y = Ax.
3066 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3067 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3068 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3069            submatrix  (same for all local rows)
3070 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3071            of the in diagonal portion of the local (possibly different for each block
3072            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3073            and set it even if it is zero.
3074 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3075            submatrix (same for all local rows).
3076 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3077            off-diagonal portion of the local submatrix (possibly different for
3078            each block row) or NULL.
3079 
3080    Output Parameter:
3081 .  A - the matrix
3082 
3083    Options Database Keys:
3084 +   -mat_block_size - size of the blocks to use
3085 -   -mat_use_hash_table <fact> - set hash table factor
3086 
3087    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3088    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3089    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3090 
3091    Notes:
3092    If the *_nnz parameter is given then the *_nz parameter is ignored
3093 
3094    A nonzero block is any block that as 1 or more nonzeros in it
3095 
3096    The user MUST specify either the local or global matrix dimensions
3097    (possibly both).
3098 
3099    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3100    than it must be used on all processors that share the object for that argument.
3101 
3102    Storage Information:
3103    For a square global matrix we define each processor's diagonal portion
3104    to be its local rows and the corresponding columns (a square submatrix);
3105    each processor's off-diagonal portion encompasses the remainder of the
3106    local matrix (a rectangular submatrix).
3107 
3108    The user can specify preallocated storage for the diagonal part of
3109    the local submatrix with either d_nz or d_nnz (not both).  Set
3110    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3111    memory allocation.  Likewise, specify preallocated storage for the
3112    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3113 
3114    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3115    the figure below we depict these three local rows and all columns (0-11).
3116 
3117 .vb
3118            0 1 2 3 4 5 6 7 8 9 10 11
3119           --------------------------
3120    row 3  |o o o d d d o o o o  o  o
3121    row 4  |o o o d d d o o o o  o  o
3122    row 5  |o o o d d d o o o o  o  o
3123           --------------------------
3124 .ve
3125 
3126    Thus, any entries in the d locations are stored in the d (diagonal)
3127    submatrix, and any entries in the o locations are stored in the
3128    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3129    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3130 
3131    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3132    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3133    In general, for PDE problems in which most nonzeros are near the diagonal,
3134    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3135    or you will get TERRIBLE performance; see the users' manual chapter on
3136    matrices.
3137 
3138    Level: intermediate
3139 
3140 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3141 @*/
3142 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3143 {
3144   PetscMPIInt size;
3145 
3146   PetscFunctionBegin;
3147   PetscCall(MatCreate(comm, A));
3148   PetscCall(MatSetSizes(*A, m, n, M, N));
3149   PetscCallMPI(MPI_Comm_size(comm, &size));
3150   if (size > 1) {
3151     PetscCall(MatSetType(*A, MATMPIBAIJ));
3152     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3153   } else {
3154     PetscCall(MatSetType(*A, MATSEQBAIJ));
3155     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3156   }
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3161 {
3162   Mat          mat;
3163   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3164   PetscInt     len = 0;
3165 
3166   PetscFunctionBegin;
3167   *newmat = NULL;
3168   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3169   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3170   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3171 
3172   mat->factortype   = matin->factortype;
3173   mat->preallocated = PETSC_TRUE;
3174   mat->assembled    = PETSC_TRUE;
3175   mat->insertmode   = NOT_SET_VALUES;
3176 
3177   a             = (Mat_MPIBAIJ *)mat->data;
3178   mat->rmap->bs = matin->rmap->bs;
3179   a->bs2        = oldmat->bs2;
3180   a->mbs        = oldmat->mbs;
3181   a->nbs        = oldmat->nbs;
3182   a->Mbs        = oldmat->Mbs;
3183   a->Nbs        = oldmat->Nbs;
3184 
3185   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3186   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3187 
3188   a->size         = oldmat->size;
3189   a->rank         = oldmat->rank;
3190   a->donotstash   = oldmat->donotstash;
3191   a->roworiented  = oldmat->roworiented;
3192   a->rowindices   = NULL;
3193   a->rowvalues    = NULL;
3194   a->getrowactive = PETSC_FALSE;
3195   a->barray       = NULL;
3196   a->rstartbs     = oldmat->rstartbs;
3197   a->rendbs       = oldmat->rendbs;
3198   a->cstartbs     = oldmat->cstartbs;
3199   a->cendbs       = oldmat->cendbs;
3200 
3201   /* hash table stuff */
3202   a->ht           = NULL;
3203   a->hd           = NULL;
3204   a->ht_size      = 0;
3205   a->ht_flag      = oldmat->ht_flag;
3206   a->ht_fact      = oldmat->ht_fact;
3207   a->ht_total_ct  = 0;
3208   a->ht_insert_ct = 0;
3209 
3210   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3211   if (oldmat->colmap) {
3212 #if defined(PETSC_USE_CTABLE)
3213     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
3214 #else
3215     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3216     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3217 #endif
3218   } else a->colmap = NULL;
3219 
3220   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3221     PetscCall(PetscMalloc1(len, &a->garray));
3222     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3223   } else a->garray = NULL;
3224 
3225   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3226   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3227   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3228 
3229   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3230   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3231   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3232   *newmat = mat;
3233   PetscFunctionReturn(0);
3234 }
3235 
3236 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3237 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3238 {
3239   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3240   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3241   PetscScalar *matvals;
3242 
3243   PetscFunctionBegin;
3244   PetscCall(PetscViewerSetUp(viewer));
3245 
3246   /* read in matrix header */
3247   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3248   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3249   M  = header[1];
3250   N  = header[2];
3251   nz = header[3];
3252   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3253   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3254   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3255 
3256   /* set block sizes from the viewer's .info file */
3257   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3258   /* set local sizes if not set already */
3259   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3260   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3261   /* set global sizes if not set already */
3262   if (mat->rmap->N < 0) mat->rmap->N = M;
3263   if (mat->cmap->N < 0) mat->cmap->N = N;
3264   PetscCall(PetscLayoutSetUp(mat->rmap));
3265   PetscCall(PetscLayoutSetUp(mat->cmap));
3266 
3267   /* check if the matrix sizes are correct */
3268   PetscCall(MatGetSize(mat, &rows, &cols));
3269   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3270   PetscCall(MatGetBlockSize(mat, &bs));
3271   PetscCall(MatGetLocalSize(mat, &m, &n));
3272   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3273   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3274   mbs = m / bs;
3275   nbs = n / bs;
3276 
3277   /* read in row lengths and build row indices */
3278   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3279   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3280   rowidxs[0] = 0;
3281   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3282   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3283   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3284 
3285   /* read in column indices and matrix values */
3286   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3287   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3288   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3289 
3290   {                /* preallocate matrix storage */
3291     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3292     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3293     PetscBool  sbaij, done;
3294     PetscInt  *d_nnz, *o_nnz;
3295 
3296     PetscCall(PetscBTCreate(nbs, &bt));
3297     PetscCall(PetscHSetICreate(&ht));
3298     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3299     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3300     for (i = 0; i < mbs; i++) {
3301       PetscCall(PetscBTMemzero(nbs, bt));
3302       PetscCall(PetscHSetIClear(ht));
3303       for (k = 0; k < bs; k++) {
3304         PetscInt row = bs * i + k;
3305         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3306           PetscInt col = colidxs[j];
3307           if (!sbaij || col >= row) {
3308             if (col >= cs && col < ce) {
3309               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3310             } else {
3311               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3312               if (done) o_nnz[i]++;
3313             }
3314           }
3315         }
3316       }
3317     }
3318     PetscCall(PetscBTDestroy(&bt));
3319     PetscCall(PetscHSetIDestroy(&ht));
3320     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3321     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3322     PetscCall(PetscFree2(d_nnz, o_nnz));
3323   }
3324 
3325   /* store matrix values */
3326   for (i = 0; i < m; i++) {
3327     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3328     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3329   }
3330 
3331   PetscCall(PetscFree(rowidxs));
3332   PetscCall(PetscFree2(colidxs, matvals));
3333   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3334   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3335   PetscFunctionReturn(0);
3336 }
3337 
3338 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3339 {
3340   PetscBool isbinary;
3341 
3342   PetscFunctionBegin;
3343   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3344   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3345   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3346   PetscFunctionReturn(0);
3347 }
3348 
3349 /*@
3350    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3351 
3352    Input Parameters:
3353 +  mat  - the matrix
3354 -  fact - factor
3355 
3356    Options Database Key:
3357 .  -mat_use_hash_table <fact> - provide the factor
3358 
3359    Level: advanced
3360 
3361 .seealso: `MATMPIBAIJ`, `MatSetOption()`
3362 @*/
3363 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3364 {
3365   PetscFunctionBegin;
3366   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3367   PetscFunctionReturn(0);
3368 }
3369 
3370 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3371 {
3372   Mat_MPIBAIJ *baij;
3373 
3374   PetscFunctionBegin;
3375   baij          = (Mat_MPIBAIJ *)mat->data;
3376   baij->ht_fact = fact;
3377   PetscFunctionReturn(0);
3378 }
3379 
3380 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3381 {
3382   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3383   PetscBool    flg;
3384 
3385   PetscFunctionBegin;
3386   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3387   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3388   if (Ad) *Ad = a->A;
3389   if (Ao) *Ao = a->B;
3390   if (colmap) *colmap = a->garray;
3391   PetscFunctionReturn(0);
3392 }
3393 
3394 /*
3395     Special version for direct calls from Fortran (to eliminate two function call overheads
3396 */
3397 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3398   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3399 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3400   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3401 #endif
3402 
3403 /*@C
3404   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3405 
3406   Collective
3407 
3408   Input Parameters:
3409 + mat - the matrix
3410 . min - number of input rows
3411 . im - input rows
3412 . nin - number of input columns
3413 . in - input columns
3414 . v - numerical values input
3415 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3416 
3417   Developer Note:
3418     This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3419 
3420   Level: advanced
3421 
3422 .seealso: `MatSetValuesBlocked()`
3423 @*/
3424 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3425 {
3426   /* convert input arguments to C version */
3427   Mat        mat = *matin;
3428   PetscInt   m = *min, n = *nin;
3429   InsertMode addv = *addvin;
3430 
3431   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3432   const MatScalar *value;
3433   MatScalar       *barray      = baij->barray;
3434   PetscBool        roworiented = baij->roworiented;
3435   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3436   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3437   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3438 
3439   PetscFunctionBegin;
3440   /* tasks normally handled by MatSetValuesBlocked() */
3441   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3442   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3443   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3444   if (mat->assembled) {
3445     mat->was_assembled = PETSC_TRUE;
3446     mat->assembled     = PETSC_FALSE;
3447   }
3448   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3449 
3450   if (!barray) {
3451     PetscCall(PetscMalloc1(bs2, &barray));
3452     baij->barray = barray;
3453   }
3454 
3455   if (roworiented) stepval = (n - 1) * bs;
3456   else stepval = (m - 1) * bs;
3457 
3458   for (i = 0; i < m; i++) {
3459     if (im[i] < 0) continue;
3460     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3461     if (im[i] >= rstart && im[i] < rend) {
3462       row = im[i] - rstart;
3463       for (j = 0; j < n; j++) {
3464         /* If NumCol = 1 then a copy is not required */
3465         if ((roworiented) && (n == 1)) {
3466           barray = (MatScalar *)v + i * bs2;
3467         } else if ((!roworiented) && (m == 1)) {
3468           barray = (MatScalar *)v + j * bs2;
3469         } else { /* Here a copy is required */
3470           if (roworiented) {
3471             value = v + i * (stepval + bs) * bs + j * bs;
3472           } else {
3473             value = v + j * (stepval + bs) * bs + i * bs;
3474           }
3475           for (ii = 0; ii < bs; ii++, value += stepval) {
3476             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3477           }
3478           barray -= bs2;
3479         }
3480 
3481         if (in[j] >= cstart && in[j] < cend) {
3482           col = in[j] - cstart;
3483           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3484         } else if (in[j] < 0) {
3485           continue;
3486         } else {
3487           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3488           if (mat->was_assembled) {
3489             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3490 
3491 #if defined(PETSC_USE_DEBUG)
3492   #if defined(PETSC_USE_CTABLE)
3493             {
3494               PetscInt data;
3495               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
3496               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3497             }
3498   #else
3499             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3500   #endif
3501 #endif
3502 #if defined(PETSC_USE_CTABLE)
3503             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
3504             col = (col - 1) / bs;
3505 #else
3506             col = (baij->colmap[in[j]] - 1) / bs;
3507 #endif
3508             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3509               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3510               col = in[j];
3511             }
3512           } else col = in[j];
3513           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3514         }
3515       }
3516     } else {
3517       if (!baij->donotstash) {
3518         if (roworiented) {
3519           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3520         } else {
3521           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3522         }
3523       }
3524     }
3525   }
3526 
3527   /* task normally handled by MatSetValuesBlocked() */
3528   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3529   PetscFunctionReturn(0);
3530 }
3531 
3532 /*@
3533      MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3534          CSR format the local rows.
3535 
3536    Collective
3537 
3538    Input Parameters:
3539 +  comm - MPI communicator
3540 .  bs - the block size, only a block size of 1 is supported
3541 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
3542 .  n - This value should be the same as the local size used in creating the
3543        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3544        calculated if N is given) For square matrices n is almost always m.
3545 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3546 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3547 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3548 .   j - column indices
3549 -   a - matrix values
3550 
3551    Output Parameter:
3552 .   mat - the matrix
3553 
3554    Level: intermediate
3555 
3556    Notes:
3557        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3558      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3559      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3560 
3561      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3562      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3563      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3564      with column-major ordering within blocks.
3565 
3566        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3567 
3568 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3569           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3570 @*/
3571 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3572 {
3573   PetscFunctionBegin;
3574   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3575   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3576   PetscCall(MatCreate(comm, mat));
3577   PetscCall(MatSetSizes(*mat, m, n, M, N));
3578   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3579   PetscCall(MatSetBlockSize(*mat, bs));
3580   PetscCall(MatSetUp(*mat));
3581   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3582   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3583   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3584   PetscFunctionReturn(0);
3585 }
3586 
3587 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3588 {
3589   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3590   PetscInt    *indx;
3591   PetscScalar *values;
3592 
3593   PetscFunctionBegin;
3594   PetscCall(MatGetSize(inmat, &m, &N));
3595   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3596     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3597     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3598     PetscInt    *bindx, rmax = a->rmax, j;
3599     PetscMPIInt  rank, size;
3600 
3601     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3602     mbs = m / bs;
3603     Nbs = N / cbs;
3604     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3605     nbs = n / cbs;
3606 
3607     PetscCall(PetscMalloc1(rmax, &bindx));
3608     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3609 
3610     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3611     PetscCallMPI(MPI_Comm_rank(comm, &size));
3612     if (rank == size - 1) {
3613       /* Check sum(nbs) = Nbs */
3614       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3615     }
3616 
3617     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3618     for (i = 0; i < mbs; i++) {
3619       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3620       nnz = nnz / bs;
3621       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3622       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3623       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3624     }
3625     PetscCall(PetscFree(bindx));
3626 
3627     PetscCall(MatCreate(comm, outmat));
3628     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3629     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3630     PetscCall(MatSetType(*outmat, MATBAIJ));
3631     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3632     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3633     MatPreallocateEnd(dnz, onz);
3634     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3635   }
3636 
3637   /* numeric phase */
3638   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3639   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3640 
3641   for (i = 0; i < m; i++) {
3642     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3643     Ii = i + rstart;
3644     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3645     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3646   }
3647   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3648   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3649   PetscFunctionReturn(0);
3650 }
3651