xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) {
12   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
13   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
14   PetscScalar       *va, *vv;
15   Vec                vB, vA;
16   const PetscScalar *vb;
17 
18   PetscFunctionBegin;
19   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
20   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
21 
22   PetscCall(VecGetArrayWrite(vA, &va));
23   if (idx) {
24     for (i = 0; i < m; i++) {
25       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
26     }
27   }
28 
29   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
30   PetscCall(PetscMalloc1(m, &idxb));
31   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
32 
33   PetscCall(VecGetArrayWrite(v, &vv));
34   PetscCall(VecGetArrayRead(vB, &vb));
35   for (i = 0; i < m; i++) {
36     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
37       vv[i] = vb[i];
38       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
39     } else {
40       vv[i] = va[i];
41       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
42     }
43   }
44   PetscCall(VecRestoreArrayWrite(vA, &vv));
45   PetscCall(VecRestoreArrayWrite(vA, &va));
46   PetscCall(VecRestoreArrayRead(vB, &vb));
47   PetscCall(PetscFree(idxb));
48   PetscCall(VecDestroy(&vA));
49   PetscCall(VecDestroy(&vB));
50   PetscFunctionReturn(0);
51 }
52 
53 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) {
54   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
55 
56   PetscFunctionBegin;
57   PetscCall(MatStoreValues(aij->A));
58   PetscCall(MatStoreValues(aij->B));
59   PetscFunctionReturn(0);
60 }
61 
62 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) {
63   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
64 
65   PetscFunctionBegin;
66   PetscCall(MatRetrieveValues(aij->A));
67   PetscCall(MatRetrieveValues(aij->B));
68   PetscFunctionReturn(0);
69 }
70 
71 /*
72      Local utility routine that creates a mapping from the global column
73    number to the local number in the off-diagonal part of the local
74    storage of the matrix.  This is done in a non scalable way since the
75    length of colmap equals the global matrix length.
76 */
77 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) {
78   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
79   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
80   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   PetscCall(PetscTableCreate(baij->nbs, baij->Nbs + 1, &baij->colmap));
85   for (i = 0; i < nbs; i++) PetscCall(PetscTableAdd(baij->colmap, baij->garray[i] + 1, i * bs + 1, INSERT_VALUES));
86 #else
87   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
88   PetscCall(PetscLogObjectMemory((PetscObject)mat, baij->Nbs * sizeof(PetscInt)));
89   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
90 #endif
91   PetscFunctionReturn(0);
92 }
93 
94 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
95   { \
96     brow = row / bs; \
97     rp   = aj + ai[brow]; \
98     ap   = aa + bs2 * ai[brow]; \
99     rmax = aimax[brow]; \
100     nrow = ailen[brow]; \
101     bcol = col / bs; \
102     ridx = row % bs; \
103     cidx = col % bs; \
104     low  = 0; \
105     high = nrow; \
106     while (high - low > 3) { \
107       t = (low + high) / 2; \
108       if (rp[t] > bcol) high = t; \
109       else low = t; \
110     } \
111     for (_i = low; _i < high; _i++) { \
112       if (rp[_i] > bcol) break; \
113       if (rp[_i] == bcol) { \
114         bap = ap + bs2 * _i + bs * cidx + ridx; \
115         if (addv == ADD_VALUES) *bap += value; \
116         else *bap = value; \
117         goto a_noinsert; \
118       } \
119     } \
120     if (a->nonew == 1) goto a_noinsert; \
121     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
122     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
123     N = nrow++ - 1; \
124     /* shift up all the later entries in this row */ \
125     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
126     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
127     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
128     rp[_i]                          = bcol; \
129     ap[bs2 * _i + bs * cidx + ridx] = value; \
130   a_noinsert:; \
131     ailen[brow] = nrow; \
132   }
133 
134 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
135   { \
136     brow = row / bs; \
137     rp   = bj + bi[brow]; \
138     ap   = ba + bs2 * bi[brow]; \
139     rmax = bimax[brow]; \
140     nrow = bilen[brow]; \
141     bcol = col / bs; \
142     ridx = row % bs; \
143     cidx = col % bs; \
144     low  = 0; \
145     high = nrow; \
146     while (high - low > 3) { \
147       t = (low + high) / 2; \
148       if (rp[t] > bcol) high = t; \
149       else low = t; \
150     } \
151     for (_i = low; _i < high; _i++) { \
152       if (rp[_i] > bcol) break; \
153       if (rp[_i] == bcol) { \
154         bap = ap + bs2 * _i + bs * cidx + ridx; \
155         if (addv == ADD_VALUES) *bap += value; \
156         else *bap = value; \
157         goto b_noinsert; \
158       } \
159     } \
160     if (b->nonew == 1) goto b_noinsert; \
161     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
162     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
163     N = nrow++ - 1; \
164     /* shift up all the later entries in this row */ \
165     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
166     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
167     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
168     rp[_i]                          = bcol; \
169     ap[bs2 * _i + bs * cidx + ridx] = value; \
170   b_noinsert:; \
171     bilen[brow] = nrow; \
172   }
173 
174 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
175   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
176   MatScalar    value;
177   PetscBool    roworiented = baij->roworiented;
178   PetscInt     i, j, row, col;
179   PetscInt     rstart_orig = mat->rmap->rstart;
180   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
181   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
182 
183   /* Some Variables required in the macro */
184   Mat          A     = baij->A;
185   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
186   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
187   MatScalar   *aa = a->a;
188 
189   Mat          B     = baij->B;
190   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
191   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
192   MatScalar   *ba = b->a;
193 
194   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
195   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
196   MatScalar *ap, *bap;
197 
198   PetscFunctionBegin;
199   for (i = 0; i < m; i++) {
200     if (im[i] < 0) continue;
201     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
202     if (im[i] >= rstart_orig && im[i] < rend_orig) {
203       row = im[i] - rstart_orig;
204       for (j = 0; j < n; j++) {
205         if (in[j] >= cstart_orig && in[j] < cend_orig) {
206           col = in[j] - cstart_orig;
207           if (roworiented) value = v[i * n + j];
208           else value = v[i + j * m];
209           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
210         } else if (in[j] < 0) {
211           continue;
212         } else {
213           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
214           if (mat->was_assembled) {
215             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
216 #if defined(PETSC_USE_CTABLE)
217             PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col));
218             col = col - 1;
219 #else
220             col = baij->colmap[in[j] / bs] - 1;
221 #endif
222             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
223               PetscCall(MatDisAssemble_MPIBAIJ(mat));
224               col   = in[j];
225               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
226               B     = baij->B;
227               b     = (Mat_SeqBAIJ *)(B)->data;
228               bimax = b->imax;
229               bi    = b->i;
230               bilen = b->ilen;
231               bj    = b->j;
232               ba    = b->a;
233             } else {
234               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
235               col += in[j] % bs;
236             }
237           } else col = in[j];
238           if (roworiented) value = v[i * n + j];
239           else value = v[i + j * m];
240           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
241           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
242         }
243       }
244     } else {
245       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
246       if (!baij->donotstash) {
247         mat->assembled = PETSC_FALSE;
248         if (roworiented) {
249           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
250         } else {
251           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
252         }
253       }
254     }
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) {
260   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
261   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
262   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
263   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
264   PetscBool          roworiented = a->roworiented;
265   const PetscScalar *value       = v;
266   MatScalar         *ap, *aa = a->a, *bap;
267 
268   PetscFunctionBegin;
269   rp    = aj + ai[row];
270   ap    = aa + bs2 * ai[row];
271   rmax  = imax[row];
272   nrow  = ailen[row];
273   value = v;
274   low   = 0;
275   high  = nrow;
276   while (high - low > 7) {
277     t = (low + high) / 2;
278     if (rp[t] > col) high = t;
279     else low = t;
280   }
281   for (i = low; i < high; i++) {
282     if (rp[i] > col) break;
283     if (rp[i] == col) {
284       bap = ap + bs2 * i;
285       if (roworiented) {
286         if (is == ADD_VALUES) {
287           for (ii = 0; ii < bs; ii++) {
288             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
289           }
290         } else {
291           for (ii = 0; ii < bs; ii++) {
292             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
293           }
294         }
295       } else {
296         if (is == ADD_VALUES) {
297           for (ii = 0; ii < bs; ii++, value += bs) {
298             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
299             bap += bs;
300           }
301         } else {
302           for (ii = 0; ii < bs; ii++, value += bs) {
303             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
304             bap += bs;
305           }
306         }
307       }
308       goto noinsert2;
309     }
310   }
311   if (nonew == 1) goto noinsert2;
312   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
313   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
314   N = nrow++ - 1;
315   high++;
316   /* shift up all the later entries in this row */
317   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
318   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
319   rp[i] = col;
320   bap   = ap + bs2 * i;
321   if (roworiented) {
322     for (ii = 0; ii < bs; ii++) {
323       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
324     }
325   } else {
326     for (ii = 0; ii < bs; ii++) {
327       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
328     }
329   }
330 noinsert2:;
331   ailen[row] = nrow;
332   PetscFunctionReturn(0);
333 }
334 
335 /*
336     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
337     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
338 */
339 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
340   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
341   const PetscScalar *value;
342   MatScalar         *barray      = baij->barray;
343   PetscBool          roworiented = baij->roworiented;
344   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
345   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
346   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
347 
348   PetscFunctionBegin;
349   if (!barray) {
350     PetscCall(PetscMalloc1(bs2, &barray));
351     baij->barray = barray;
352   }
353 
354   if (roworiented) stepval = (n - 1) * bs;
355   else stepval = (m - 1) * bs;
356 
357   for (i = 0; i < m; i++) {
358     if (im[i] < 0) continue;
359     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
360     if (im[i] >= rstart && im[i] < rend) {
361       row = im[i] - rstart;
362       for (j = 0; j < n; j++) {
363         /* If NumCol = 1 then a copy is not required */
364         if ((roworiented) && (n == 1)) {
365           barray = (MatScalar *)v + i * bs2;
366         } else if ((!roworiented) && (m == 1)) {
367           barray = (MatScalar *)v + j * bs2;
368         } else { /* Here a copy is required */
369           if (roworiented) {
370             value = v + (i * (stepval + bs) + j) * bs;
371           } else {
372             value = v + (j * (stepval + bs) + i) * bs;
373           }
374           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
375             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
376             barray += bs;
377           }
378           barray -= bs2;
379         }
380 
381         if (in[j] >= cstart && in[j] < cend) {
382           col = in[j] - cstart;
383           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
384         } else if (in[j] < 0) {
385           continue;
386         } else {
387           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
388           if (mat->was_assembled) {
389             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
390 
391 #if defined(PETSC_USE_DEBUG)
392 #if defined(PETSC_USE_CTABLE)
393             {
394               PetscInt data;
395               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
396               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
397             }
398 #else
399             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
400 #endif
401 #endif
402 #if defined(PETSC_USE_CTABLE)
403             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
404             col = (col - 1) / bs;
405 #else
406             col = (baij->colmap[in[j]] - 1) / bs;
407 #endif
408             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
409               PetscCall(MatDisAssemble_MPIBAIJ(mat));
410               col = in[j];
411             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
412           } else col = in[j];
413           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
414         }
415       }
416     } else {
417       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
418       if (!baij->donotstash) {
419         if (roworiented) {
420           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
421         } else {
422           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
423         }
424       }
425     }
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 #define HASH_KEY             0.6180339887
431 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
432 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
433 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
434 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
435   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
436   PetscBool    roworiented = baij->roworiented;
437   PetscInt     i, j, row, col;
438   PetscInt     rstart_orig = mat->rmap->rstart;
439   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
440   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
441   PetscReal    tmp;
442   MatScalar  **HD       = baij->hd, value;
443   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
444 
445   PetscFunctionBegin;
446   for (i = 0; i < m; i++) {
447     if (PetscDefined(USE_DEBUG)) {
448       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
449       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
450     }
451     row = im[i];
452     if (row >= rstart_orig && row < rend_orig) {
453       for (j = 0; j < n; j++) {
454         col = in[j];
455         if (roworiented) value = v[i * n + j];
456         else value = v[i + j * m];
457         /* Look up PetscInto the Hash Table */
458         key = (row / bs) * Nbs + (col / bs) + 1;
459         h1  = HASH(size, key, tmp);
460 
461         idx = h1;
462         if (PetscDefined(USE_DEBUG)) {
463           insert_ct++;
464           total_ct++;
465           if (HT[idx] != key) {
466             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
467               ;
468             if (idx == size) {
469               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
470                 ;
471               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
472             }
473           }
474         } else if (HT[idx] != key) {
475           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
476             ;
477           if (idx == size) {
478             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
479               ;
480             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
481           }
482         }
483         /* A HASH table entry is found, so insert the values at the correct address */
484         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
485         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
486       }
487     } else if (!baij->donotstash) {
488       if (roworiented) {
489         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
490       } else {
491         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
492       }
493     }
494   }
495   if (PetscDefined(USE_DEBUG)) {
496     baij->ht_total_ct += total_ct;
497     baij->ht_insert_ct += insert_ct;
498   }
499   PetscFunctionReturn(0);
500 }
501 
502 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
503   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
504   PetscBool          roworiented = baij->roworiented;
505   PetscInt           i, j, ii, jj, row, col;
506   PetscInt           rstart = baij->rstartbs;
507   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
508   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
509   PetscReal          tmp;
510   MatScalar        **HD = baij->hd, *baij_a;
511   const PetscScalar *v_t, *value;
512   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
513 
514   PetscFunctionBegin;
515   if (roworiented) stepval = (n - 1) * bs;
516   else stepval = (m - 1) * bs;
517 
518   for (i = 0; i < m; i++) {
519     if (PetscDefined(USE_DEBUG)) {
520       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
521       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
522     }
523     row = im[i];
524     v_t = v + i * nbs2;
525     if (row >= rstart && row < rend) {
526       for (j = 0; j < n; j++) {
527         col = in[j];
528 
529         /* Look up into the Hash Table */
530         key = row * Nbs + col + 1;
531         h1  = HASH(size, key, tmp);
532 
533         idx = h1;
534         if (PetscDefined(USE_DEBUG)) {
535           total_ct++;
536           insert_ct++;
537           if (HT[idx] != key) {
538             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
539               ;
540             if (idx == size) {
541               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
542                 ;
543               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
544             }
545           }
546         } else if (HT[idx] != key) {
547           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
548             ;
549           if (idx == size) {
550             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
551               ;
552             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
553           }
554         }
555         baij_a = HD[idx];
556         if (roworiented) {
557           /*value = v + i*(stepval+bs)*bs + j*bs;*/
558           /* value = v + (i*(stepval+bs)+j)*bs; */
559           value = v_t;
560           v_t += bs;
561           if (addv == ADD_VALUES) {
562             for (ii = 0; ii < bs; ii++, value += stepval) {
563               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
564             }
565           } else {
566             for (ii = 0; ii < bs; ii++, value += stepval) {
567               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
568             }
569           }
570         } else {
571           value = v + j * (stepval + bs) * bs + i * bs;
572           if (addv == ADD_VALUES) {
573             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
574               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
575             }
576           } else {
577             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
578               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
579             }
580           }
581         }
582       }
583     } else {
584       if (!baij->donotstash) {
585         if (roworiented) {
586           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
587         } else {
588           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
589         }
590       }
591     }
592   }
593   if (PetscDefined(USE_DEBUG)) {
594     baij->ht_total_ct += total_ct;
595     baij->ht_insert_ct += insert_ct;
596   }
597   PetscFunctionReturn(0);
598 }
599 
600 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
601   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
602   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
603   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
604 
605   PetscFunctionBegin;
606   for (i = 0; i < m; i++) {
607     if (idxm[i] < 0) continue; /* negative row */
608     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
609     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
610       row = idxm[i] - bsrstart;
611       for (j = 0; j < n; j++) {
612         if (idxn[j] < 0) continue; /* negative column */
613         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
614         if (idxn[j] >= bscstart && idxn[j] < bscend) {
615           col = idxn[j] - bscstart;
616           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
617         } else {
618           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
619 #if defined(PETSC_USE_CTABLE)
620           PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data));
621           data--;
622 #else
623           data = baij->colmap[idxn[j] / bs] - 1;
624 #endif
625           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
626           else {
627             col = data + idxn[j] % bs;
628             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
629           }
630         }
631       }
632     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
633   }
634   PetscFunctionReturn(0);
635 }
636 
637 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) {
638   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
639   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
640   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
641   PetscReal    sum = 0.0;
642   MatScalar   *v;
643 
644   PetscFunctionBegin;
645   if (baij->size == 1) {
646     PetscCall(MatNorm(baij->A, type, nrm));
647   } else {
648     if (type == NORM_FROBENIUS) {
649       v  = amat->a;
650       nz = amat->nz * bs2;
651       for (i = 0; i < nz; i++) {
652         sum += PetscRealPart(PetscConj(*v) * (*v));
653         v++;
654       }
655       v  = bmat->a;
656       nz = bmat->nz * bs2;
657       for (i = 0; i < nz; i++) {
658         sum += PetscRealPart(PetscConj(*v) * (*v));
659         v++;
660       }
661       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
662       *nrm = PetscSqrtReal(*nrm);
663     } else if (type == NORM_1) { /* max column sum */
664       PetscReal *tmp, *tmp2;
665       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
666       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
667       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
668       v  = amat->a;
669       jj = amat->j;
670       for (i = 0; i < amat->nz; i++) {
671         for (j = 0; j < bs; j++) {
672           col = bs * (cstart + *jj) + j; /* column index */
673           for (row = 0; row < bs; row++) {
674             tmp[col] += PetscAbsScalar(*v);
675             v++;
676           }
677         }
678         jj++;
679       }
680       v  = bmat->a;
681       jj = bmat->j;
682       for (i = 0; i < bmat->nz; i++) {
683         for (j = 0; j < bs; j++) {
684           col = bs * garray[*jj] + j;
685           for (row = 0; row < bs; row++) {
686             tmp[col] += PetscAbsScalar(*v);
687             v++;
688           }
689         }
690         jj++;
691       }
692       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
693       *nrm = 0.0;
694       for (j = 0; j < mat->cmap->N; j++) {
695         if (tmp2[j] > *nrm) *nrm = tmp2[j];
696       }
697       PetscCall(PetscFree(tmp));
698       PetscCall(PetscFree(tmp2));
699     } else if (type == NORM_INFINITY) { /* max row sum */
700       PetscReal *sums;
701       PetscCall(PetscMalloc1(bs, &sums));
702       sum = 0.0;
703       for (j = 0; j < amat->mbs; j++) {
704         for (row = 0; row < bs; row++) sums[row] = 0.0;
705         v  = amat->a + bs2 * amat->i[j];
706         nz = amat->i[j + 1] - amat->i[j];
707         for (i = 0; i < nz; i++) {
708           for (col = 0; col < bs; col++) {
709             for (row = 0; row < bs; row++) {
710               sums[row] += PetscAbsScalar(*v);
711               v++;
712             }
713           }
714         }
715         v  = bmat->a + bs2 * bmat->i[j];
716         nz = bmat->i[j + 1] - bmat->i[j];
717         for (i = 0; i < nz; i++) {
718           for (col = 0; col < bs; col++) {
719             for (row = 0; row < bs; row++) {
720               sums[row] += PetscAbsScalar(*v);
721               v++;
722             }
723           }
724         }
725         for (row = 0; row < bs; row++) {
726           if (sums[row] > sum) sum = sums[row];
727         }
728       }
729       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
730       PetscCall(PetscFree(sums));
731     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
732   }
733   PetscFunctionReturn(0);
734 }
735 
736 /*
737   Creates the hash table, and sets the table
738   This table is created only once.
739   If new entried need to be added to the matrix
740   then the hash table has to be destroyed and
741   recreated.
742 */
743 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) {
744   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
745   Mat          A = baij->A, B = baij->B;
746   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
747   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
748   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
749   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
750   PetscInt    *HT, key;
751   MatScalar  **HD;
752   PetscReal    tmp;
753 #if defined(PETSC_USE_INFO)
754   PetscInt ct = 0, max = 0;
755 #endif
756 
757   PetscFunctionBegin;
758   if (baij->ht) PetscFunctionReturn(0);
759 
760   baij->ht_size = (PetscInt)(factor * nz);
761   ht_size       = baij->ht_size;
762 
763   /* Allocate Memory for Hash Table */
764   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
765   HD = baij->hd;
766   HT = baij->ht;
767 
768   /* Loop Over A */
769   for (i = 0; i < a->mbs; i++) {
770     for (j = ai[i]; j < ai[i + 1]; j++) {
771       row = i + rstart;
772       col = aj[j] + cstart;
773 
774       key = row * Nbs + col + 1;
775       h1  = HASH(ht_size, key, tmp);
776       for (k = 0; k < ht_size; k++) {
777         if (!HT[(h1 + k) % ht_size]) {
778           HT[(h1 + k) % ht_size] = key;
779           HD[(h1 + k) % ht_size] = a->a + j * bs2;
780           break;
781 #if defined(PETSC_USE_INFO)
782         } else {
783           ct++;
784 #endif
785         }
786       }
787 #if defined(PETSC_USE_INFO)
788       if (k > max) max = k;
789 #endif
790     }
791   }
792   /* Loop Over B */
793   for (i = 0; i < b->mbs; i++) {
794     for (j = bi[i]; j < bi[i + 1]; j++) {
795       row = i + rstart;
796       col = garray[bj[j]];
797       key = row * Nbs + col + 1;
798       h1  = HASH(ht_size, key, tmp);
799       for (k = 0; k < ht_size; k++) {
800         if (!HT[(h1 + k) % ht_size]) {
801           HT[(h1 + k) % ht_size] = key;
802           HD[(h1 + k) % ht_size] = b->a + j * bs2;
803           break;
804 #if defined(PETSC_USE_INFO)
805         } else {
806           ct++;
807 #endif
808         }
809       }
810 #if defined(PETSC_USE_INFO)
811       if (k > max) max = k;
812 #endif
813     }
814   }
815 
816   /* Print Summary */
817 #if defined(PETSC_USE_INFO)
818   for (i = 0, j = 0; i < ht_size; i++) {
819     if (HT[i]) j++;
820   }
821   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
822 #endif
823   PetscFunctionReturn(0);
824 }
825 
826 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) {
827   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
828   PetscInt     nstash, reallocs;
829 
830   PetscFunctionBegin;
831   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
832 
833   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
834   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
835   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
836   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
837   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
838   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
839   PetscFunctionReturn(0);
840 }
841 
842 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) {
843   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
844   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
845   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
846   PetscInt    *row, *col;
847   PetscBool    r1, r2, r3, other_disassembled;
848   MatScalar   *val;
849   PetscMPIInt  n;
850 
851   PetscFunctionBegin;
852   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
853   if (!baij->donotstash && !mat->nooffprocentries) {
854     while (1) {
855       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
856       if (!flg) break;
857 
858       for (i = 0; i < n;) {
859         /* Now identify the consecutive vals belonging to the same row */
860         for (j = i, rstart = row[j]; j < n; j++) {
861           if (row[j] != rstart) break;
862         }
863         if (j < n) ncols = j - i;
864         else ncols = n - i;
865         /* Now assemble all these values with a single function call */
866         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
867         i = j;
868       }
869     }
870     PetscCall(MatStashScatterEnd_Private(&mat->stash));
871     /* Now process the block-stash. Since the values are stashed column-oriented,
872        set the roworiented flag to column oriented, and after MatSetValues()
873        restore the original flags */
874     r1 = baij->roworiented;
875     r2 = a->roworiented;
876     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
877 
878     baij->roworiented = PETSC_FALSE;
879     a->roworiented    = PETSC_FALSE;
880 
881     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
882     while (1) {
883       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
884       if (!flg) break;
885 
886       for (i = 0; i < n;) {
887         /* Now identify the consecutive vals belonging to the same row */
888         for (j = i, rstart = row[j]; j < n; j++) {
889           if (row[j] != rstart) break;
890         }
891         if (j < n) ncols = j - i;
892         else ncols = n - i;
893         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
894         i = j;
895       }
896     }
897     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
898 
899     baij->roworiented = r1;
900     a->roworiented    = r2;
901 
902     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
903   }
904 
905   PetscCall(MatAssemblyBegin(baij->A, mode));
906   PetscCall(MatAssemblyEnd(baij->A, mode));
907 
908   /* determine if any processor has disassembled, if so we must
909      also disassemble ourselves, in order that we may reassemble. */
910   /*
911      if nonzero structure of submatrix B cannot change then we know that
912      no processor disassembled thus we can skip this stuff
913   */
914   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
915     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
916     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
917   }
918 
919   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
920   PetscCall(MatAssemblyBegin(baij->B, mode));
921   PetscCall(MatAssemblyEnd(baij->B, mode));
922 
923 #if defined(PETSC_USE_INFO)
924   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
925     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
926 
927     baij->ht_total_ct  = 0;
928     baij->ht_insert_ct = 0;
929   }
930 #endif
931   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
932     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
933 
934     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
935     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
936   }
937 
938   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
939 
940   baij->rowvalues = NULL;
941 
942   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
943   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
944     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
945     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
946   }
947   PetscFunctionReturn(0);
948 }
949 
950 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
951 #include <petscdraw.h>
952 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
953   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
954   PetscMPIInt       rank = baij->rank;
955   PetscInt          bs   = mat->rmap->bs;
956   PetscBool         iascii, isdraw;
957   PetscViewer       sviewer;
958   PetscViewerFormat format;
959 
960   PetscFunctionBegin;
961   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
962   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
963   if (iascii) {
964     PetscCall(PetscViewerGetFormat(viewer, &format));
965     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
966       MatInfo info;
967       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
968       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
969       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
970       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
971                                                    mat->rmap->bs, (double)info.memory));
972       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
973       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
974       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
975       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
976       PetscCall(PetscViewerFlush(viewer));
977       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
978       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
979       PetscCall(VecScatterView(baij->Mvctx, viewer));
980       PetscFunctionReturn(0);
981     } else if (format == PETSC_VIEWER_ASCII_INFO) {
982       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
983       PetscFunctionReturn(0);
984     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
985       PetscFunctionReturn(0);
986     }
987   }
988 
989   if (isdraw) {
990     PetscDraw draw;
991     PetscBool isnull;
992     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
993     PetscCall(PetscDrawIsNull(draw, &isnull));
994     if (isnull) PetscFunctionReturn(0);
995   }
996 
997   {
998     /* assemble the entire matrix onto first processor. */
999     Mat          A;
1000     Mat_SeqBAIJ *Aloc;
1001     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1002     MatScalar   *a;
1003     const char  *matname;
1004 
1005     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1006     /* Perhaps this should be the type of mat? */
1007     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1008     if (rank == 0) {
1009       PetscCall(MatSetSizes(A, M, N, M, N));
1010     } else {
1011       PetscCall(MatSetSizes(A, 0, 0, M, N));
1012     }
1013     PetscCall(MatSetType(A, MATMPIBAIJ));
1014     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1015     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1016     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A));
1017 
1018     /* copy over the A part */
1019     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1020     ai   = Aloc->i;
1021     aj   = Aloc->j;
1022     a    = Aloc->a;
1023     PetscCall(PetscMalloc1(bs, &rvals));
1024 
1025     for (i = 0; i < mbs; i++) {
1026       rvals[0] = bs * (baij->rstartbs + i);
1027       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1028       for (j = ai[i]; j < ai[i + 1]; j++) {
1029         col = (baij->cstartbs + aj[j]) * bs;
1030         for (k = 0; k < bs; k++) {
1031           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1032           col++;
1033           a += bs;
1034         }
1035       }
1036     }
1037     /* copy over the B part */
1038     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1039     ai   = Aloc->i;
1040     aj   = Aloc->j;
1041     a    = Aloc->a;
1042     for (i = 0; i < mbs; i++) {
1043       rvals[0] = bs * (baij->rstartbs + i);
1044       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1045       for (j = ai[i]; j < ai[i + 1]; j++) {
1046         col = baij->garray[aj[j]] * bs;
1047         for (k = 0; k < bs; k++) {
1048           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1049           col++;
1050           a += bs;
1051         }
1052       }
1053     }
1054     PetscCall(PetscFree(rvals));
1055     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1056     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1057     /*
1058        Everyone has to call to draw the matrix since the graphics waits are
1059        synchronized across all processors that share the PetscDraw object
1060     */
1061     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1062     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1063     if (rank == 0) {
1064       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1065       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1066     }
1067     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1068     PetscCall(PetscViewerFlush(viewer));
1069     PetscCall(MatDestroy(&A));
1070   }
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1075 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) {
1076   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1077   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1078   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1079   const PetscInt *garray = aij->garray;
1080   PetscInt        header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l;
1081   PetscInt       *rowlens, *colidxs;
1082   PetscScalar    *matvals;
1083 
1084   PetscFunctionBegin;
1085   PetscCall(PetscViewerSetUp(viewer));
1086 
1087   M  = mat->rmap->N;
1088   N  = mat->cmap->N;
1089   m  = mat->rmap->n;
1090   rs = mat->rmap->rstart;
1091   cs = mat->cmap->rstart;
1092   bs = mat->rmap->bs;
1093   nz = bs * bs * (A->nz + B->nz);
1094 
1095   /* write matrix header */
1096   header[0] = MAT_FILE_CLASSID;
1097   header[1] = M;
1098   header[2] = N;
1099   header[3] = nz;
1100   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1101   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1102 
1103   /* fill in and store row lengths */
1104   PetscCall(PetscMalloc1(m, &rowlens));
1105   for (cnt = 0, i = 0; i < A->mbs; i++)
1106     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1107   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1108   PetscCall(PetscFree(rowlens));
1109 
1110   /* fill in and store column indices */
1111   PetscCall(PetscMalloc1(nz, &colidxs));
1112   for (cnt = 0, i = 0; i < A->mbs; i++) {
1113     for (k = 0; k < bs; k++) {
1114       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1115         if (garray[B->j[jb]] > cs / bs) break;
1116         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1117       }
1118       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1119         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1120       for (; jb < B->i[i + 1]; jb++)
1121         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1122     }
1123   }
1124   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1125   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1126   PetscCall(PetscFree(colidxs));
1127 
1128   /* fill in and store nonzero values */
1129   PetscCall(PetscMalloc1(nz, &matvals));
1130   for (cnt = 0, i = 0; i < A->mbs; i++) {
1131     for (k = 0; k < bs; k++) {
1132       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1133         if (garray[B->j[jb]] > cs / bs) break;
1134         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1135       }
1136       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1137         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1138       for (; jb < B->i[i + 1]; jb++)
1139         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1140     }
1141   }
1142   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1143   PetscCall(PetscFree(matvals));
1144 
1145   /* write block size option to the viewer's .info file */
1146   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) {
1151   PetscBool iascii, isdraw, issocket, isbinary;
1152 
1153   PetscFunctionBegin;
1154   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1155   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1156   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1157   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1158   if (iascii || isdraw || issocket) {
1159     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1160   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) {
1165   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1166 
1167   PetscFunctionBegin;
1168 #if defined(PETSC_USE_LOG)
1169   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1170 #endif
1171   PetscCall(MatStashDestroy_Private(&mat->stash));
1172   PetscCall(MatStashDestroy_Private(&mat->bstash));
1173   PetscCall(MatDestroy(&baij->A));
1174   PetscCall(MatDestroy(&baij->B));
1175 #if defined(PETSC_USE_CTABLE)
1176   PetscCall(PetscTableDestroy(&baij->colmap));
1177 #else
1178   PetscCall(PetscFree(baij->colmap));
1179 #endif
1180   PetscCall(PetscFree(baij->garray));
1181   PetscCall(VecDestroy(&baij->lvec));
1182   PetscCall(VecScatterDestroy(&baij->Mvctx));
1183   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1184   PetscCall(PetscFree(baij->barray));
1185   PetscCall(PetscFree2(baij->hd, baij->ht));
1186   PetscCall(PetscFree(baij->rangebs));
1187   PetscCall(PetscFree(mat->data));
1188 
1189   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1195   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
1196   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
1197   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
1198   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
1199 #if defined(PETSC_HAVE_HYPRE)
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
1201 #endif
1202   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) {
1207   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1208   PetscInt     nt;
1209 
1210   PetscFunctionBegin;
1211   PetscCall(VecGetLocalSize(xx, &nt));
1212   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1213   PetscCall(VecGetLocalSize(yy, &nt));
1214   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible parition of A and yy");
1215   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1216   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1217   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1218   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1223   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1224 
1225   PetscFunctionBegin;
1226   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1227   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1228   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1229   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) {
1234   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1235 
1236   PetscFunctionBegin;
1237   /* do nondiagonal part */
1238   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1239   /* do local part */
1240   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1241   /* add partial results together */
1242   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1243   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1248   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1249 
1250   PetscFunctionBegin;
1251   /* do nondiagonal part */
1252   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1253   /* do local part */
1254   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1255   /* add partial results together */
1256   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1257   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 /*
1262   This only works correctly for square matrices where the subblock A->A is the
1263    diagonal block
1264 */
1265 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) {
1266   PetscFunctionBegin;
1267   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1268   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) {
1273   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1274 
1275   PetscFunctionBegin;
1276   PetscCall(MatScale(a->A, aa));
1277   PetscCall(MatScale(a->B, aa));
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1282   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1283   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1284   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1285   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1286   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1287 
1288   PetscFunctionBegin;
1289   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1290   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1291   mat->getrowactive = PETSC_TRUE;
1292 
1293   if (!mat->rowvalues && (idx || v)) {
1294     /*
1295         allocate enough space to hold information from the longest row.
1296     */
1297     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1298     PetscInt     max = 1, mbs = mat->mbs, tmp;
1299     for (i = 0; i < mbs; i++) {
1300       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1301       if (max < tmp) max = tmp;
1302     }
1303     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1304   }
1305   lrow = row - brstart;
1306 
1307   pvA = &vworkA;
1308   pcA = &cworkA;
1309   pvB = &vworkB;
1310   pcB = &cworkB;
1311   if (!v) {
1312     pvA = NULL;
1313     pvB = NULL;
1314   }
1315   if (!idx) {
1316     pcA = NULL;
1317     if (!v) pcB = NULL;
1318   }
1319   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1320   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1321   nztot = nzA + nzB;
1322 
1323   cmap = mat->garray;
1324   if (v || idx) {
1325     if (nztot) {
1326       /* Sort by increasing column numbers, assuming A and B already sorted */
1327       PetscInt imark = -1;
1328       if (v) {
1329         *v = v_p = mat->rowvalues;
1330         for (i = 0; i < nzB; i++) {
1331           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1332           else break;
1333         }
1334         imark = i;
1335         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1336         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1337       }
1338       if (idx) {
1339         *idx = idx_p = mat->rowindices;
1340         if (imark > -1) {
1341           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1342         } else {
1343           for (i = 0; i < nzB; i++) {
1344             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1345             else break;
1346           }
1347           imark = i;
1348         }
1349         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1350         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1351       }
1352     } else {
1353       if (idx) *idx = NULL;
1354       if (v) *v = NULL;
1355     }
1356   }
1357   *nz = nztot;
1358   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1359   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1364   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1365 
1366   PetscFunctionBegin;
1367   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1368   baij->getrowactive = PETSC_FALSE;
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) {
1373   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1374 
1375   PetscFunctionBegin;
1376   PetscCall(MatZeroEntries(l->A));
1377   PetscCall(MatZeroEntries(l->B));
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) {
1382   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1383   Mat            A = a->A, B = a->B;
1384   PetscLogDouble isend[5], irecv[5];
1385 
1386   PetscFunctionBegin;
1387   info->block_size = (PetscReal)matin->rmap->bs;
1388 
1389   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1390 
1391   isend[0] = info->nz_used;
1392   isend[1] = info->nz_allocated;
1393   isend[2] = info->nz_unneeded;
1394   isend[3] = info->memory;
1395   isend[4] = info->mallocs;
1396 
1397   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1398 
1399   isend[0] += info->nz_used;
1400   isend[1] += info->nz_allocated;
1401   isend[2] += info->nz_unneeded;
1402   isend[3] += info->memory;
1403   isend[4] += info->mallocs;
1404 
1405   if (flag == MAT_LOCAL) {
1406     info->nz_used      = isend[0];
1407     info->nz_allocated = isend[1];
1408     info->nz_unneeded  = isend[2];
1409     info->memory       = isend[3];
1410     info->mallocs      = isend[4];
1411   } else if (flag == MAT_GLOBAL_MAX) {
1412     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1413 
1414     info->nz_used      = irecv[0];
1415     info->nz_allocated = irecv[1];
1416     info->nz_unneeded  = irecv[2];
1417     info->memory       = irecv[3];
1418     info->mallocs      = irecv[4];
1419   } else if (flag == MAT_GLOBAL_SUM) {
1420     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1421 
1422     info->nz_used      = irecv[0];
1423     info->nz_allocated = irecv[1];
1424     info->nz_unneeded  = irecv[2];
1425     info->memory       = irecv[3];
1426     info->mallocs      = irecv[4];
1427   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1428   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1429   info->fill_ratio_needed = 0;
1430   info->factor_mallocs    = 0;
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) {
1435   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1436 
1437   PetscFunctionBegin;
1438   switch (op) {
1439   case MAT_NEW_NONZERO_LOCATIONS:
1440   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1441   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1442   case MAT_KEEP_NONZERO_PATTERN:
1443   case MAT_NEW_NONZERO_LOCATION_ERR:
1444     MatCheckPreallocated(A, 1);
1445     PetscCall(MatSetOption(a->A, op, flg));
1446     PetscCall(MatSetOption(a->B, op, flg));
1447     break;
1448   case MAT_ROW_ORIENTED:
1449     MatCheckPreallocated(A, 1);
1450     a->roworiented = flg;
1451 
1452     PetscCall(MatSetOption(a->A, op, flg));
1453     PetscCall(MatSetOption(a->B, op, flg));
1454     break;
1455   case MAT_FORCE_DIAGONAL_ENTRIES:
1456   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
1457   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
1458   case MAT_USE_HASH_TABLE:
1459     a->ht_flag = flg;
1460     a->ht_fact = 1.39;
1461     break;
1462   case MAT_SYMMETRIC:
1463   case MAT_STRUCTURALLY_SYMMETRIC:
1464   case MAT_HERMITIAN:
1465   case MAT_SUBMAT_SINGLEIS:
1466   case MAT_SYMMETRY_ETERNAL:
1467   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1468   case MAT_SPD_ETERNAL:
1469     /* if the diagonal matrix is square it inherits some of the properties above */
1470     break;
1471   default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) {
1477   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1478   Mat_SeqBAIJ *Aloc;
1479   Mat          B;
1480   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1481   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1482   MatScalar   *a;
1483 
1484   PetscFunctionBegin;
1485   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1486   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1487     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1488     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1489     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1490     /* Do not know preallocation information, but must set block size */
1491     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1492   } else {
1493     B = *matout;
1494   }
1495 
1496   /* copy over the A part */
1497   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1498   ai   = Aloc->i;
1499   aj   = Aloc->j;
1500   a    = Aloc->a;
1501   PetscCall(PetscMalloc1(bs, &rvals));
1502 
1503   for (i = 0; i < mbs; i++) {
1504     rvals[0] = bs * (baij->rstartbs + i);
1505     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1506     for (j = ai[i]; j < ai[i + 1]; j++) {
1507       col = (baij->cstartbs + aj[j]) * bs;
1508       for (k = 0; k < bs; k++) {
1509         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1510 
1511         col++;
1512         a += bs;
1513       }
1514     }
1515   }
1516   /* copy over the B part */
1517   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1518   ai   = Aloc->i;
1519   aj   = Aloc->j;
1520   a    = Aloc->a;
1521   for (i = 0; i < mbs; i++) {
1522     rvals[0] = bs * (baij->rstartbs + i);
1523     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1524     for (j = ai[i]; j < ai[i + 1]; j++) {
1525       col = baij->garray[aj[j]] * bs;
1526       for (k = 0; k < bs; k++) {
1527         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1528         col++;
1529         a += bs;
1530       }
1531     }
1532   }
1533   PetscCall(PetscFree(rvals));
1534   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1535   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1536 
1537   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1538   else PetscCall(MatHeaderMerge(A, &B));
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) {
1543   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1544   Mat          a = baij->A, b = baij->B;
1545   PetscInt     s1, s2, s3;
1546 
1547   PetscFunctionBegin;
1548   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1549   if (rr) {
1550     PetscCall(VecGetLocalSize(rr, &s1));
1551     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1552     /* Overlap communication with computation. */
1553     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1554   }
1555   if (ll) {
1556     PetscCall(VecGetLocalSize(ll, &s1));
1557     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1558     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1559   }
1560   /* scale  the diagonal block */
1561   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1562 
1563   if (rr) {
1564     /* Do a scatter end and then right scale the off-diagonal block */
1565     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1566     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
1572   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1573   PetscInt    *lrows;
1574   PetscInt     r, len;
1575   PetscBool    cong;
1576 
1577   PetscFunctionBegin;
1578   /* get locally owned rows */
1579   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1580   /* fix right hand side if needed */
1581   if (x && b) {
1582     const PetscScalar *xx;
1583     PetscScalar       *bb;
1584 
1585     PetscCall(VecGetArrayRead(x, &xx));
1586     PetscCall(VecGetArray(b, &bb));
1587     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1588     PetscCall(VecRestoreArrayRead(x, &xx));
1589     PetscCall(VecRestoreArray(b, &bb));
1590   }
1591 
1592   /* actually zap the local rows */
1593   /*
1594         Zero the required rows. If the "diagonal block" of the matrix
1595      is square and the user wishes to set the diagonal we use separate
1596      code so that MatSetValues() is not called for each diagonal allocating
1597      new memory, thus calling lots of mallocs and slowing things down.
1598 
1599   */
1600   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1601   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1602   PetscCall(MatHasCongruentLayouts(A, &cong));
1603   if ((diag != 0.0) && cong) {
1604     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1605   } else if (diag != 0.0) {
1606     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1607     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1608        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1609     for (r = 0; r < len; ++r) {
1610       const PetscInt row = lrows[r] + A->rmap->rstart;
1611       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1612     }
1613     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1614     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1615   } else {
1616     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1617   }
1618   PetscCall(PetscFree(lrows));
1619 
1620   /* only change matrix nonzero state if pattern was allowed to be changed */
1621   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1622     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1623     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
1629   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1630   PetscMPIInt        n = A->rmap->n, p = 0;
1631   PetscInt           i, j, k, r, len = 0, row, col, count;
1632   PetscInt          *lrows, *owners = A->rmap->range;
1633   PetscSFNode       *rrows;
1634   PetscSF            sf;
1635   const PetscScalar *xx;
1636   PetscScalar       *bb, *mask;
1637   Vec                xmask, lmask;
1638   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1639   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1640   PetscScalar       *aa;
1641 
1642   PetscFunctionBegin;
1643   /* Create SF where leaves are input rows and roots are owned rows */
1644   PetscCall(PetscMalloc1(n, &lrows));
1645   for (r = 0; r < n; ++r) lrows[r] = -1;
1646   PetscCall(PetscMalloc1(N, &rrows));
1647   for (r = 0; r < N; ++r) {
1648     const PetscInt idx = rows[r];
1649     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1650     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1651       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1652     }
1653     rrows[r].rank  = p;
1654     rrows[r].index = rows[r] - owners[p];
1655   }
1656   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1657   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1658   /* Collect flags for rows to be zeroed */
1659   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1660   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1661   PetscCall(PetscSFDestroy(&sf));
1662   /* Compress and put in row numbers */
1663   for (r = 0; r < n; ++r)
1664     if (lrows[r] >= 0) lrows[len++] = r;
1665   /* zero diagonal part of matrix */
1666   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1667   /* handle off diagonal part of matrix */
1668   PetscCall(MatCreateVecs(A, &xmask, NULL));
1669   PetscCall(VecDuplicate(l->lvec, &lmask));
1670   PetscCall(VecGetArray(xmask, &bb));
1671   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1672   PetscCall(VecRestoreArray(xmask, &bb));
1673   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1674   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1675   PetscCall(VecDestroy(&xmask));
1676   if (x) {
1677     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1678     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1679     PetscCall(VecGetArrayRead(l->lvec, &xx));
1680     PetscCall(VecGetArray(b, &bb));
1681   }
1682   PetscCall(VecGetArray(lmask, &mask));
1683   /* remove zeroed rows of off diagonal matrix */
1684   for (i = 0; i < len; ++i) {
1685     row   = lrows[i];
1686     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1687     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1688     for (k = 0; k < count; ++k) {
1689       aa[0] = 0.0;
1690       aa += bs;
1691     }
1692   }
1693   /* loop over all elements of off process part of matrix zeroing removed columns*/
1694   for (i = 0; i < l->B->rmap->N; ++i) {
1695     row = i / bs;
1696     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1697       for (k = 0; k < bs; ++k) {
1698         col = bs * baij->j[j] + k;
1699         if (PetscAbsScalar(mask[col])) {
1700           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1701           if (x) bb[i] -= aa[0] * xx[col];
1702           aa[0] = 0.0;
1703         }
1704       }
1705     }
1706   }
1707   if (x) {
1708     PetscCall(VecRestoreArray(b, &bb));
1709     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1710   }
1711   PetscCall(VecRestoreArray(lmask, &mask));
1712   PetscCall(VecDestroy(&lmask));
1713   PetscCall(PetscFree(lrows));
1714 
1715   /* only change matrix nonzero state if pattern was allowed to be changed */
1716   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1717     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1718     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1719   }
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) {
1724   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1725 
1726   PetscFunctionBegin;
1727   PetscCall(MatSetUnfactored(a->A));
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1732 
1733 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) {
1734   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1735   Mat          a, b, c, d;
1736   PetscBool    flg;
1737 
1738   PetscFunctionBegin;
1739   a = matA->A;
1740   b = matA->B;
1741   c = matB->A;
1742   d = matB->B;
1743 
1744   PetscCall(MatEqual(a, c, &flg));
1745   if (flg) PetscCall(MatEqual(b, d, &flg));
1746   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) {
1751   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1752   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1753 
1754   PetscFunctionBegin;
1755   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1756   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1757     PetscCall(MatCopy_Basic(A, B, str));
1758   } else {
1759     PetscCall(MatCopy(a->A, b->A, str));
1760     PetscCall(MatCopy(a->B, b->B, str));
1761   }
1762   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) {
1767   PetscFunctionBegin;
1768   PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) {
1773   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1774   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1775   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1776 
1777   PetscFunctionBegin;
1778   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
1783   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1784   PetscBLASInt bnz, one                         = 1;
1785   Mat_SeqBAIJ *x, *y;
1786   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1787 
1788   PetscFunctionBegin;
1789   if (str == SAME_NONZERO_PATTERN) {
1790     PetscScalar alpha = a;
1791     x                 = (Mat_SeqBAIJ *)xx->A->data;
1792     y                 = (Mat_SeqBAIJ *)yy->A->data;
1793     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1794     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1795     x = (Mat_SeqBAIJ *)xx->B->data;
1796     y = (Mat_SeqBAIJ *)yy->B->data;
1797     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1798     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1799     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1800   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1801     PetscCall(MatAXPY_Basic(Y, a, X, str));
1802   } else {
1803     Mat       B;
1804     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1805     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1806     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1807     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1808     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1809     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1810     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1811     PetscCall(MatSetType(B, MATMPIBAIJ));
1812     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1813     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1814     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1815     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1816     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1817     PetscCall(MatHeaderMerge(Y, &B));
1818     PetscCall(PetscFree(nnz_d));
1819     PetscCall(PetscFree(nnz_o));
1820   }
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) {
1825   PetscFunctionBegin;
1826   if (PetscDefined(USE_COMPLEX)) {
1827     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1828 
1829     PetscCall(MatConjugate_SeqBAIJ(a->A));
1830     PetscCall(MatConjugate_SeqBAIJ(a->B));
1831   }
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) {
1836   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1837 
1838   PetscFunctionBegin;
1839   PetscCall(MatRealPart(a->A));
1840   PetscCall(MatRealPart(a->B));
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) {
1845   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1846 
1847   PetscFunctionBegin;
1848   PetscCall(MatImaginaryPart(a->A));
1849   PetscCall(MatImaginaryPart(a->B));
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) {
1854   IS       iscol_local;
1855   PetscInt csize;
1856 
1857   PetscFunctionBegin;
1858   PetscCall(ISGetLocalSize(iscol, &csize));
1859   if (call == MAT_REUSE_MATRIX) {
1860     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1861     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1862   } else {
1863     PetscCall(ISAllGather(iscol, &iscol_local));
1864   }
1865   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1866   if (call == MAT_INITIAL_MATRIX) {
1867     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1868     PetscCall(ISDestroy(&iscol_local));
1869   }
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 /*
1874   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1875   in local and then by concatenating the local matrices the end result.
1876   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1877   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1878 */
1879 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) {
1880   PetscMPIInt  rank, size;
1881   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1882   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1883   Mat          M, Mreuse;
1884   MatScalar   *vwork, *aa;
1885   MPI_Comm     comm;
1886   IS           isrow_new, iscol_new;
1887   Mat_SeqBAIJ *aij;
1888 
1889   PetscFunctionBegin;
1890   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1891   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1892   PetscCallMPI(MPI_Comm_size(comm, &size));
1893   /* The compression and expansion should be avoided. Doesn't point
1894      out errors, might change the indices, hence buggey */
1895   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1896   PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1897 
1898   if (call == MAT_REUSE_MATRIX) {
1899     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1900     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1901     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse));
1902   } else {
1903     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse));
1904   }
1905   PetscCall(ISDestroy(&isrow_new));
1906   PetscCall(ISDestroy(&iscol_new));
1907   /*
1908       m - number of local rows
1909       n - number of columns (same on all processors)
1910       rstart - first row in new global matrix generated
1911   */
1912   PetscCall(MatGetBlockSize(mat, &bs));
1913   PetscCall(MatGetSize(Mreuse, &m, &n));
1914   m = m / bs;
1915   n = n / bs;
1916 
1917   if (call == MAT_INITIAL_MATRIX) {
1918     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1919     ii  = aij->i;
1920     jj  = aij->j;
1921 
1922     /*
1923         Determine the number of non-zeros in the diagonal and off-diagonal
1924         portions of the matrix in order to do correct preallocation
1925     */
1926 
1927     /* first get start and end of "diagonal" columns */
1928     if (csize == PETSC_DECIDE) {
1929       PetscCall(ISGetSize(isrow, &mglobal));
1930       if (mglobal == n * bs) { /* square matrix */
1931         nlocal = m;
1932       } else {
1933         nlocal = n / size + ((n % size) > rank);
1934       }
1935     } else {
1936       nlocal = csize / bs;
1937     }
1938     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1939     rstart = rend - nlocal;
1940     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1941 
1942     /* next, compute all the lengths */
1943     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1944     for (i = 0; i < m; i++) {
1945       jend = ii[i + 1] - ii[i];
1946       olen = 0;
1947       dlen = 0;
1948       for (j = 0; j < jend; j++) {
1949         if (*jj < rstart || *jj >= rend) olen++;
1950         else dlen++;
1951         jj++;
1952       }
1953       olens[i] = olen;
1954       dlens[i] = dlen;
1955     }
1956     PetscCall(MatCreate(comm, &M));
1957     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1958     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
1959     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1960     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1961     PetscCall(PetscFree2(dlens, olens));
1962   } else {
1963     PetscInt ml, nl;
1964 
1965     M = *newmat;
1966     PetscCall(MatGetLocalSize(M, &ml, &nl));
1967     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
1968     PetscCall(MatZeroEntries(M));
1969     /*
1970          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1971        rather than the slower MatSetValues().
1972     */
1973     M->was_assembled = PETSC_TRUE;
1974     M->assembled     = PETSC_FALSE;
1975   }
1976   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
1977   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
1978   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1979   ii  = aij->i;
1980   jj  = aij->j;
1981   aa  = aij->a;
1982   for (i = 0; i < m; i++) {
1983     row   = rstart / bs + i;
1984     nz    = ii[i + 1] - ii[i];
1985     cwork = jj;
1986     jj += nz;
1987     vwork = aa;
1988     aa += nz * bs * bs;
1989     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
1990   }
1991 
1992   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
1993   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
1994   *newmat = M;
1995 
1996   /* save submatrix used in processor for next request */
1997   if (call == MAT_INITIAL_MATRIX) {
1998     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
1999     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2000   }
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) {
2005   MPI_Comm        comm, pcomm;
2006   PetscInt        clocal_size, nrows;
2007   const PetscInt *rows;
2008   PetscMPIInt     size;
2009   IS              crowp, lcolp;
2010 
2011   PetscFunctionBegin;
2012   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2013   /* make a collective version of 'rowp' */
2014   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2015   if (pcomm == comm) {
2016     crowp = rowp;
2017   } else {
2018     PetscCall(ISGetSize(rowp, &nrows));
2019     PetscCall(ISGetIndices(rowp, &rows));
2020     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2021     PetscCall(ISRestoreIndices(rowp, &rows));
2022   }
2023   PetscCall(ISSetPermutation(crowp));
2024   /* make a local version of 'colp' */
2025   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2026   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2027   if (size == 1) {
2028     lcolp = colp;
2029   } else {
2030     PetscCall(ISAllGather(colp, &lcolp));
2031   }
2032   PetscCall(ISSetPermutation(lcolp));
2033   /* now we just get the submatrix */
2034   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2035   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B));
2036   /* clean up */
2037   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2038   if (size > 1) PetscCall(ISDestroy(&lcolp));
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) {
2043   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2044   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2045 
2046   PetscFunctionBegin;
2047   if (nghosts) *nghosts = B->nbs;
2048   if (ghosts) *ghosts = baij->garray;
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) {
2053   Mat          B;
2054   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2055   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2056   Mat_SeqAIJ  *b;
2057   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2058   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2059   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2060 
2061   PetscFunctionBegin;
2062   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2063   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2064 
2065   /* ----------------------------------------------------------------
2066      Tell every processor the number of nonzeros per row
2067   */
2068   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2069   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2070   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2071   displs = recvcounts + size;
2072   for (i = 0; i < size; i++) {
2073     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2074     displs[i]     = A->rmap->range[i] / bs;
2075   }
2076   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2077   /* ---------------------------------------------------------------
2078      Create the sequential matrix of the same type as the local block diagonal
2079   */
2080   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2081   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2082   PetscCall(MatSetType(B, MATSEQAIJ));
2083   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2084   b = (Mat_SeqAIJ *)B->data;
2085 
2086   /*--------------------------------------------------------------------
2087     Copy my part of matrix column indices over
2088   */
2089   sendcount  = ad->nz + bd->nz;
2090   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2091   a_jsendbuf = ad->j;
2092   b_jsendbuf = bd->j;
2093   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2094   cnt        = 0;
2095   for (i = 0; i < n; i++) {
2096     /* put in lower diagonal portion */
2097     m = bd->i[i + 1] - bd->i[i];
2098     while (m > 0) {
2099       /* is it above diagonal (in bd (compressed) numbering) */
2100       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2101       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2102       m--;
2103     }
2104 
2105     /* put in diagonal portion */
2106     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2107 
2108     /* put in upper diagonal portion */
2109     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2110   }
2111   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2112 
2113   /*--------------------------------------------------------------------
2114     Gather all column indices to all processors
2115   */
2116   for (i = 0; i < size; i++) {
2117     recvcounts[i] = 0;
2118     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2119   }
2120   displs[0] = 0;
2121   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2122   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2123   /*--------------------------------------------------------------------
2124     Assemble the matrix into useable form (note numerical values not yet set)
2125   */
2126   /* set the b->ilen (length of each row) values */
2127   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2128   /* set the b->i indices */
2129   b->i[0] = 0;
2130   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2131   PetscCall(PetscFree(lens));
2132   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2133   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2134   PetscCall(PetscFree(recvcounts));
2135 
2136   PetscCall(MatPropagateSymmetryOptions(A, B));
2137   *newmat = B;
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
2142   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2143   Vec          bb1 = NULL;
2144 
2145   PetscFunctionBegin;
2146   if (flag == SOR_APPLY_UPPER) {
2147     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2148     PetscFunctionReturn(0);
2149   }
2150 
2151   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2152 
2153   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2154     if (flag & SOR_ZERO_INITIAL_GUESS) {
2155       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2156       its--;
2157     }
2158 
2159     while (its--) {
2160       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2161       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2162 
2163       /* update rhs: bb1 = bb - B*x */
2164       PetscCall(VecScale(mat->lvec, -1.0));
2165       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2166 
2167       /* local sweep */
2168       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2169     }
2170   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2171     if (flag & SOR_ZERO_INITIAL_GUESS) {
2172       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2173       its--;
2174     }
2175     while (its--) {
2176       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2177       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2178 
2179       /* update rhs: bb1 = bb - B*x */
2180       PetscCall(VecScale(mat->lvec, -1.0));
2181       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2182 
2183       /* local sweep */
2184       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2185     }
2186   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2187     if (flag & SOR_ZERO_INITIAL_GUESS) {
2188       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2189       its--;
2190     }
2191     while (its--) {
2192       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2193       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2194 
2195       /* update rhs: bb1 = bb - B*x */
2196       PetscCall(VecScale(mat->lvec, -1.0));
2197       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2198 
2199       /* local sweep */
2200       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2201     }
2202   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2203 
2204   PetscCall(VecDestroy(&bb1));
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) {
2209   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2210   PetscInt     m, N, i, *garray = aij->garray;
2211   PetscInt     ib, jb, bs = A->rmap->bs;
2212   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2213   MatScalar   *a_val = a_aij->a;
2214   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2215   MatScalar   *b_val = b_aij->a;
2216   PetscReal   *work;
2217 
2218   PetscFunctionBegin;
2219   PetscCall(MatGetSize(A, &m, &N));
2220   PetscCall(PetscCalloc1(N, &work));
2221   if (type == NORM_2) {
2222     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2223       for (jb = 0; jb < bs; jb++) {
2224         for (ib = 0; ib < bs; ib++) {
2225           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2226           a_val++;
2227         }
2228       }
2229     }
2230     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2231       for (jb = 0; jb < bs; jb++) {
2232         for (ib = 0; ib < bs; ib++) {
2233           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2234           b_val++;
2235         }
2236       }
2237     }
2238   } else if (type == NORM_1) {
2239     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2240       for (jb = 0; jb < bs; jb++) {
2241         for (ib = 0; ib < bs; ib++) {
2242           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2243           a_val++;
2244         }
2245       }
2246     }
2247     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2248       for (jb = 0; jb < bs; jb++) {
2249         for (ib = 0; ib < bs; ib++) {
2250           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2251           b_val++;
2252         }
2253       }
2254     }
2255   } else if (type == NORM_INFINITY) {
2256     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2257       for (jb = 0; jb < bs; jb++) {
2258         for (ib = 0; ib < bs; ib++) {
2259           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2260           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2261           a_val++;
2262         }
2263       }
2264     }
2265     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2266       for (jb = 0; jb < bs; jb++) {
2267         for (ib = 0; ib < bs; ib++) {
2268           int col   = garray[b_aij->j[i]] * bs + jb;
2269           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2270           b_val++;
2271         }
2272       }
2273     }
2274   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2275     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2276       for (jb = 0; jb < bs; jb++) {
2277         for (ib = 0; ib < bs; ib++) {
2278           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2279           a_val++;
2280         }
2281       }
2282     }
2283     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2284       for (jb = 0; jb < bs; jb++) {
2285         for (ib = 0; ib < bs; ib++) {
2286           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2287           b_val++;
2288         }
2289       }
2290     }
2291   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2292     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2293       for (jb = 0; jb < bs; jb++) {
2294         for (ib = 0; ib < bs; ib++) {
2295           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2296           a_val++;
2297         }
2298       }
2299     }
2300     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2301       for (jb = 0; jb < bs; jb++) {
2302         for (ib = 0; ib < bs; ib++) {
2303           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2304           b_val++;
2305         }
2306       }
2307     }
2308   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2309   if (type == NORM_INFINITY) {
2310     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2311   } else {
2312     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2313   }
2314   PetscCall(PetscFree(work));
2315   if (type == NORM_2) {
2316     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2317   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2318     for (i = 0; i < N; i++) reductions[i] /= m;
2319   }
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) {
2324   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2325 
2326   PetscFunctionBegin;
2327   PetscCall(MatInvertBlockDiagonal(a->A, values));
2328   A->factorerrortype             = a->A->factorerrortype;
2329   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2330   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) {
2335   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2336   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2337 
2338   PetscFunctionBegin;
2339   if (!Y->preallocated) {
2340     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2341   } else if (!aij->nz) {
2342     PetscInt nonew = aij->nonew;
2343     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2344     aij->nonew = nonew;
2345   }
2346   PetscCall(MatShift_Basic(Y, a));
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) {
2351   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2352 
2353   PetscFunctionBegin;
2354   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2355   PetscCall(MatMissingDiagonal(a->A, missing, d));
2356   if (d) {
2357     PetscInt rstart;
2358     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2359     *d += rstart / A->rmap->bs;
2360   }
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) {
2365   PetscFunctionBegin;
2366   *a = ((Mat_MPIBAIJ *)A->data)->A;
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 /* -------------------------------------------------------------------*/
2371 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2372                                        MatGetRow_MPIBAIJ,
2373                                        MatRestoreRow_MPIBAIJ,
2374                                        MatMult_MPIBAIJ,
2375                                        /* 4*/ MatMultAdd_MPIBAIJ,
2376                                        MatMultTranspose_MPIBAIJ,
2377                                        MatMultTransposeAdd_MPIBAIJ,
2378                                        NULL,
2379                                        NULL,
2380                                        NULL,
2381                                        /*10*/ NULL,
2382                                        NULL,
2383                                        NULL,
2384                                        MatSOR_MPIBAIJ,
2385                                        MatTranspose_MPIBAIJ,
2386                                        /*15*/ MatGetInfo_MPIBAIJ,
2387                                        MatEqual_MPIBAIJ,
2388                                        MatGetDiagonal_MPIBAIJ,
2389                                        MatDiagonalScale_MPIBAIJ,
2390                                        MatNorm_MPIBAIJ,
2391                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2392                                        MatAssemblyEnd_MPIBAIJ,
2393                                        MatSetOption_MPIBAIJ,
2394                                        MatZeroEntries_MPIBAIJ,
2395                                        /*24*/ MatZeroRows_MPIBAIJ,
2396                                        NULL,
2397                                        NULL,
2398                                        NULL,
2399                                        NULL,
2400                                        /*29*/ MatSetUp_MPIBAIJ,
2401                                        NULL,
2402                                        NULL,
2403                                        MatGetDiagonalBlock_MPIBAIJ,
2404                                        NULL,
2405                                        /*34*/ MatDuplicate_MPIBAIJ,
2406                                        NULL,
2407                                        NULL,
2408                                        NULL,
2409                                        NULL,
2410                                        /*39*/ MatAXPY_MPIBAIJ,
2411                                        MatCreateSubMatrices_MPIBAIJ,
2412                                        MatIncreaseOverlap_MPIBAIJ,
2413                                        MatGetValues_MPIBAIJ,
2414                                        MatCopy_MPIBAIJ,
2415                                        /*44*/ NULL,
2416                                        MatScale_MPIBAIJ,
2417                                        MatShift_MPIBAIJ,
2418                                        NULL,
2419                                        MatZeroRowsColumns_MPIBAIJ,
2420                                        /*49*/ NULL,
2421                                        NULL,
2422                                        NULL,
2423                                        NULL,
2424                                        NULL,
2425                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2426                                        NULL,
2427                                        MatSetUnfactored_MPIBAIJ,
2428                                        MatPermute_MPIBAIJ,
2429                                        MatSetValuesBlocked_MPIBAIJ,
2430                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2431                                        MatDestroy_MPIBAIJ,
2432                                        MatView_MPIBAIJ,
2433                                        NULL,
2434                                        NULL,
2435                                        /*64*/ NULL,
2436                                        NULL,
2437                                        NULL,
2438                                        NULL,
2439                                        NULL,
2440                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2441                                        NULL,
2442                                        NULL,
2443                                        NULL,
2444                                        NULL,
2445                                        /*74*/ NULL,
2446                                        MatFDColoringApply_BAIJ,
2447                                        NULL,
2448                                        NULL,
2449                                        NULL,
2450                                        /*79*/ NULL,
2451                                        NULL,
2452                                        NULL,
2453                                        NULL,
2454                                        MatLoad_MPIBAIJ,
2455                                        /*84*/ NULL,
2456                                        NULL,
2457                                        NULL,
2458                                        NULL,
2459                                        NULL,
2460                                        /*89*/ NULL,
2461                                        NULL,
2462                                        NULL,
2463                                        NULL,
2464                                        NULL,
2465                                        /*94*/ NULL,
2466                                        NULL,
2467                                        NULL,
2468                                        NULL,
2469                                        NULL,
2470                                        /*99*/ NULL,
2471                                        NULL,
2472                                        NULL,
2473                                        MatConjugate_MPIBAIJ,
2474                                        NULL,
2475                                        /*104*/ NULL,
2476                                        MatRealPart_MPIBAIJ,
2477                                        MatImaginaryPart_MPIBAIJ,
2478                                        NULL,
2479                                        NULL,
2480                                        /*109*/ NULL,
2481                                        NULL,
2482                                        NULL,
2483                                        NULL,
2484                                        MatMissingDiagonal_MPIBAIJ,
2485                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2486                                        NULL,
2487                                        MatGetGhosts_MPIBAIJ,
2488                                        NULL,
2489                                        NULL,
2490                                        /*119*/ NULL,
2491                                        NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        MatGetMultiProcBlock_MPIBAIJ,
2495                                        /*124*/ NULL,
2496                                        MatGetColumnReductions_MPIBAIJ,
2497                                        MatInvertBlockDiagonal_MPIBAIJ,
2498                                        NULL,
2499                                        NULL,
2500                                        /*129*/ NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        NULL,
2505                                        /*134*/ NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        /*139*/ MatSetBlockSizes_Default,
2511                                        NULL,
2512                                        NULL,
2513                                        MatFDColoringSetUp_MPIXAIJ,
2514                                        NULL,
2515                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2516                                        NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        /*150*/ NULL};
2522 
2523 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2524 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2525 
2526 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) {
2527   PetscInt        m, rstart, cstart, cend;
2528   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2529   const PetscInt *JJ          = NULL;
2530   PetscScalar    *values      = NULL;
2531   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2532   PetscBool       nooffprocentries;
2533 
2534   PetscFunctionBegin;
2535   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2536   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2537   PetscCall(PetscLayoutSetUp(B->rmap));
2538   PetscCall(PetscLayoutSetUp(B->cmap));
2539   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2540   m      = B->rmap->n / bs;
2541   rstart = B->rmap->rstart / bs;
2542   cstart = B->cmap->rstart / bs;
2543   cend   = B->cmap->rend / bs;
2544 
2545   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2546   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2547   for (i = 0; i < m; i++) {
2548     nz = ii[i + 1] - ii[i];
2549     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2550     nz_max = PetscMax(nz_max, nz);
2551     dlen   = 0;
2552     olen   = 0;
2553     JJ     = jj + ii[i];
2554     for (j = 0; j < nz; j++) {
2555       if (*JJ < cstart || *JJ >= cend) olen++;
2556       else dlen++;
2557       JJ++;
2558     }
2559     d_nnz[i] = dlen;
2560     o_nnz[i] = olen;
2561   }
2562   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2563   PetscCall(PetscFree2(d_nnz, o_nnz));
2564 
2565   values = (PetscScalar *)V;
2566   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2567   for (i = 0; i < m; i++) {
2568     PetscInt        row   = i + rstart;
2569     PetscInt        ncols = ii[i + 1] - ii[i];
2570     const PetscInt *icols = jj + ii[i];
2571     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2572       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2573       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2574     } else { /* block ordering does not match so we can only insert one block at a time. */
2575       PetscInt j;
2576       for (j = 0; j < ncols; j++) {
2577         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2578         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2579       }
2580     }
2581   }
2582 
2583   if (!V) PetscCall(PetscFree(values));
2584   nooffprocentries    = B->nooffprocentries;
2585   B->nooffprocentries = PETSC_TRUE;
2586   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2587   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2588   B->nooffprocentries = nooffprocentries;
2589 
2590   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 /*@C
2595    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2596 
2597    Collective
2598 
2599    Input Parameters:
2600 +  B - the matrix
2601 .  bs - the block size
2602 .  i - the indices into j for the start of each local row (starts with zero)
2603 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2604 -  v - optional values in the matrix
2605 
2606    Level: advanced
2607 
2608    Notes:
2609     The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2610    may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2611    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2612    `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2613    block column and the second index is over columns within a block.
2614 
2615    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2616 
2617 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2618 @*/
2619 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
2620   PetscFunctionBegin;
2621   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2622   PetscValidType(B, 1);
2623   PetscValidLogicalCollectiveInt(B, bs, 2);
2624   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) {
2629   Mat_MPIBAIJ *b;
2630   PetscInt     i;
2631   PetscMPIInt  size;
2632 
2633   PetscFunctionBegin;
2634   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2635   PetscCall(PetscLayoutSetUp(B->rmap));
2636   PetscCall(PetscLayoutSetUp(B->cmap));
2637   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2638 
2639   if (d_nnz) {
2640     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2641   }
2642   if (o_nnz) {
2643     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2644   }
2645 
2646   b      = (Mat_MPIBAIJ *)B->data;
2647   b->bs2 = bs * bs;
2648   b->mbs = B->rmap->n / bs;
2649   b->nbs = B->cmap->n / bs;
2650   b->Mbs = B->rmap->N / bs;
2651   b->Nbs = B->cmap->N / bs;
2652 
2653   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2654   b->rstartbs = B->rmap->rstart / bs;
2655   b->rendbs   = B->rmap->rend / bs;
2656   b->cstartbs = B->cmap->rstart / bs;
2657   b->cendbs   = B->cmap->rend / bs;
2658 
2659 #if defined(PETSC_USE_CTABLE)
2660   PetscCall(PetscTableDestroy(&b->colmap));
2661 #else
2662   PetscCall(PetscFree(b->colmap));
2663 #endif
2664   PetscCall(PetscFree(b->garray));
2665   PetscCall(VecDestroy(&b->lvec));
2666   PetscCall(VecScatterDestroy(&b->Mvctx));
2667 
2668   /* Because the B will have been resized we simply destroy it and create a new one each time */
2669   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2670   PetscCall(MatDestroy(&b->B));
2671   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2672   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2673   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2674   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->B));
2675 
2676   if (!B->preallocated) {
2677     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2678     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2679     PetscCall(MatSetType(b->A, MATSEQBAIJ));
2680     PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->A));
2681     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2682   }
2683 
2684   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2685   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2686   B->preallocated  = PETSC_TRUE;
2687   B->was_assembled = PETSC_FALSE;
2688   B->assembled     = PETSC_FALSE;
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2693 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2694 
2695 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) {
2696   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2697   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2698   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2699   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2700 
2701   PetscFunctionBegin;
2702   PetscCall(PetscMalloc1(M + 1, &ii));
2703   ii[0] = 0;
2704   for (i = 0; i < M; i++) {
2705     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2706     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2707     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2708     /* remove one from count of matrix has diagonal */
2709     for (j = id[i]; j < id[i + 1]; j++) {
2710       if (jd[j] == i) {
2711         ii[i + 1]--;
2712         break;
2713       }
2714     }
2715   }
2716   PetscCall(PetscMalloc1(ii[M], &jj));
2717   cnt = 0;
2718   for (i = 0; i < M; i++) {
2719     for (j = io[i]; j < io[i + 1]; j++) {
2720       if (garray[jo[j]] > rstart) break;
2721       jj[cnt++] = garray[jo[j]];
2722     }
2723     for (k = id[i]; k < id[i + 1]; k++) {
2724       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2725     }
2726     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2727   }
2728   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2733 
2734 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2735 
2736 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
2737   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2738   Mat_MPIAIJ  *b;
2739   Mat          B;
2740 
2741   PetscFunctionBegin;
2742   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2743 
2744   if (reuse == MAT_REUSE_MATRIX) {
2745     B = *newmat;
2746   } else {
2747     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2748     PetscCall(MatSetType(B, MATMPIAIJ));
2749     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2750     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2751     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2752     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2753   }
2754   b = (Mat_MPIAIJ *)B->data;
2755 
2756   if (reuse == MAT_REUSE_MATRIX) {
2757     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2758     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2759   } else {
2760     PetscCall(MatDestroy(&b->A));
2761     PetscCall(MatDestroy(&b->B));
2762     PetscCall(MatDisAssemble_MPIBAIJ(A));
2763     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2764     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2765     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2766     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2767   }
2768   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2769   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2770 
2771   if (reuse == MAT_INPLACE_MATRIX) {
2772     PetscCall(MatHeaderReplace(A, &B));
2773   } else {
2774     *newmat = B;
2775   }
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 /*MC
2780    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2781 
2782    Options Database Keys:
2783 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2784 . -mat_block_size <bs> - set the blocksize used to store the matrix
2785 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2786 - -mat_use_hash_table <fact> - set hash table factor
2787 
2788    Level: beginner
2789 
2790    Note:
2791     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
2792     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2793 
2794 .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ`
2795 M*/
2796 
2797 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2798 
2799 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) {
2800   Mat_MPIBAIJ *b;
2801   PetscBool    flg = PETSC_FALSE;
2802 
2803   PetscFunctionBegin;
2804   PetscCall(PetscNewLog(B, &b));
2805   B->data = (void *)b;
2806 
2807   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2808   B->assembled = PETSC_FALSE;
2809 
2810   B->insertmode = NOT_SET_VALUES;
2811   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2812   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2813 
2814   /* build local table of row and column ownerships */
2815   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2816 
2817   /* build cache for off array entries formed */
2818   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2819 
2820   b->donotstash  = PETSC_FALSE;
2821   b->colmap      = NULL;
2822   b->garray      = NULL;
2823   b->roworiented = PETSC_TRUE;
2824 
2825   /* stuff used in block assembly */
2826   b->barray = NULL;
2827 
2828   /* stuff used for matrix vector multiply */
2829   b->lvec  = NULL;
2830   b->Mvctx = NULL;
2831 
2832   /* stuff for MatGetRow() */
2833   b->rowindices   = NULL;
2834   b->rowvalues    = NULL;
2835   b->getrowactive = PETSC_FALSE;
2836 
2837   /* hash table stuff */
2838   b->ht           = NULL;
2839   b->hd           = NULL;
2840   b->ht_size      = 0;
2841   b->ht_flag      = PETSC_FALSE;
2842   b->ht_fact      = 0;
2843   b->ht_total_ct  = 0;
2844   b->ht_insert_ct = 0;
2845 
2846   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2847   b->ijonly = PETSC_FALSE;
2848 
2849   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2850   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2851   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2852 #if defined(PETSC_HAVE_HYPRE)
2853   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2854 #endif
2855   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2856   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2858   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2859   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2860   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2861   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2862   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2863 
2864   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2865   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2866   if (flg) {
2867     PetscReal fact = 1.39;
2868     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2869     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2870     if (fact <= 1.0) fact = 1.39;
2871     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2872     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2873   }
2874   PetscOptionsEnd();
2875   PetscFunctionReturn(0);
2876 }
2877 
2878 /*MC
2879    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2880 
2881    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2882    and `MATMPIBAIJ` otherwise.
2883 
2884    Options Database Keys:
2885 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2886 
2887   Level: beginner
2888 
2889 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2890 M*/
2891 
2892 /*@C
2893    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2894    (block compressed row).  For good matrix assembly performance
2895    the user should preallocate the matrix storage by setting the parameters
2896    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2897    performance can be increased by more than a factor of 50.
2898 
2899    Collective on Mat
2900 
2901    Input Parameters:
2902 +  B - the matrix
2903 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2904           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2905 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2906            submatrix  (same for all local rows)
2907 .  d_nnz - array containing the number of block nonzeros in the various block rows
2908            of the in diagonal portion of the local (possibly different for each block
2909            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2910            set it even if it is zero.
2911 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2912            submatrix (same for all local rows).
2913 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2914            off-diagonal portion of the local submatrix (possibly different for
2915            each block row) or NULL.
2916 
2917    If the *_nnz parameter is given then the *_nz parameter is ignored
2918 
2919    Options Database Keys:
2920 +   -mat_block_size - size of the blocks to use
2921 -   -mat_use_hash_table <fact> - set hash table factor
2922 
2923    Notes:
2924    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2925    than it must be used on all processors that share the object for that argument.
2926 
2927    Storage Information:
2928    For a square global matrix we define each processor's diagonal portion
2929    to be its local rows and the corresponding columns (a square submatrix);
2930    each processor's off-diagonal portion encompasses the remainder of the
2931    local matrix (a rectangular submatrix).
2932 
2933    The user can specify preallocated storage for the diagonal part of
2934    the local submatrix with either d_nz or d_nnz (not both).  Set
2935    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2936    memory allocation.  Likewise, specify preallocated storage for the
2937    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2938 
2939    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2940    the figure below we depict these three local rows and all columns (0-11).
2941 
2942 .vb
2943            0 1 2 3 4 5 6 7 8 9 10 11
2944           --------------------------
2945    row 3  |o o o d d d o o o o  o  o
2946    row 4  |o o o d d d o o o o  o  o
2947    row 5  |o o o d d d o o o o  o  o
2948           --------------------------
2949 .ve
2950 
2951    Thus, any entries in the d locations are stored in the d (diagonal)
2952    submatrix, and any entries in the o locations are stored in the
2953    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2954    stored simply in the `MATSEQBAIJ` format for compressed row storage.
2955 
2956    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2957    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2958    In general, for PDE problems in which most nonzeros are near the diagonal,
2959    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2960    or you will get TERRIBLE performance; see the users' manual chapter on
2961    matrices.
2962 
2963    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2964    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2965    You can also run with the option -info and look for messages with the string
2966    malloc in them to see if additional memory allocation was needed.
2967 
2968    Level: intermediate
2969 
2970 .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
2971 @*/
2972 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
2973   PetscFunctionBegin;
2974   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2975   PetscValidType(B, 1);
2976   PetscValidLogicalCollectiveInt(B, bs, 2);
2977   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 /*@C
2982    MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
2983    (block compressed row).  For good matrix assembly performance
2984    the user should preallocate the matrix storage by setting the parameters
2985    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2986    performance can be increased by more than a factor of 50.
2987 
2988    Collective
2989 
2990    Input Parameters:
2991 +  comm - MPI communicator
2992 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2993           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2994 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2995            This value should be the same as the local size used in creating the
2996            y vector for the matrix-vector product y = Ax.
2997 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2998            This value should be the same as the local size used in creating the
2999            x vector for the matrix-vector product y = Ax.
3000 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3001 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3002 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3003            submatrix  (same for all local rows)
3004 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3005            of the in diagonal portion of the local (possibly different for each block
3006            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3007            and set it even if it is zero.
3008 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3009            submatrix (same for all local rows).
3010 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3011            off-diagonal portion of the local submatrix (possibly different for
3012            each block row) or NULL.
3013 
3014    Output Parameter:
3015 .  A - the matrix
3016 
3017    Options Database Keys:
3018 +   -mat_block_size - size of the blocks to use
3019 -   -mat_use_hash_table <fact> - set hash table factor
3020 
3021    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3022    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3023    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3024 
3025    Notes:
3026    If the *_nnz parameter is given then the *_nz parameter is ignored
3027 
3028    A nonzero block is any block that as 1 or more nonzeros in it
3029 
3030    The user MUST specify either the local or global matrix dimensions
3031    (possibly both).
3032 
3033    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3034    than it must be used on all processors that share the object for that argument.
3035 
3036    Storage Information:
3037    For a square global matrix we define each processor's diagonal portion
3038    to be its local rows and the corresponding columns (a square submatrix);
3039    each processor's off-diagonal portion encompasses the remainder of the
3040    local matrix (a rectangular submatrix).
3041 
3042    The user can specify preallocated storage for the diagonal part of
3043    the local submatrix with either d_nz or d_nnz (not both).  Set
3044    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3045    memory allocation.  Likewise, specify preallocated storage for the
3046    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3047 
3048    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3049    the figure below we depict these three local rows and all columns (0-11).
3050 
3051 .vb
3052            0 1 2 3 4 5 6 7 8 9 10 11
3053           --------------------------
3054    row 3  |o o o d d d o o o o  o  o
3055    row 4  |o o o d d d o o o o  o  o
3056    row 5  |o o o d d d o o o o  o  o
3057           --------------------------
3058 .ve
3059 
3060    Thus, any entries in the d locations are stored in the d (diagonal)
3061    submatrix, and any entries in the o locations are stored in the
3062    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3063    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3064 
3065    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3066    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3067    In general, for PDE problems in which most nonzeros are near the diagonal,
3068    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3069    or you will get TERRIBLE performance; see the users' manual chapter on
3070    matrices.
3071 
3072    Level: intermediate
3073 
3074 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3075 @*/
3076 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) {
3077   PetscMPIInt size;
3078 
3079   PetscFunctionBegin;
3080   PetscCall(MatCreate(comm, A));
3081   PetscCall(MatSetSizes(*A, m, n, M, N));
3082   PetscCallMPI(MPI_Comm_size(comm, &size));
3083   if (size > 1) {
3084     PetscCall(MatSetType(*A, MATMPIBAIJ));
3085     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3086   } else {
3087     PetscCall(MatSetType(*A, MATSEQBAIJ));
3088     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3089   }
3090   PetscFunctionReturn(0);
3091 }
3092 
3093 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) {
3094   Mat          mat;
3095   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3096   PetscInt     len = 0;
3097 
3098   PetscFunctionBegin;
3099   *newmat = NULL;
3100   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3101   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3102   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3103 
3104   mat->factortype   = matin->factortype;
3105   mat->preallocated = PETSC_TRUE;
3106   mat->assembled    = PETSC_TRUE;
3107   mat->insertmode   = NOT_SET_VALUES;
3108 
3109   a             = (Mat_MPIBAIJ *)mat->data;
3110   mat->rmap->bs = matin->rmap->bs;
3111   a->bs2        = oldmat->bs2;
3112   a->mbs        = oldmat->mbs;
3113   a->nbs        = oldmat->nbs;
3114   a->Mbs        = oldmat->Mbs;
3115   a->Nbs        = oldmat->Nbs;
3116 
3117   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3118   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3119 
3120   a->size         = oldmat->size;
3121   a->rank         = oldmat->rank;
3122   a->donotstash   = oldmat->donotstash;
3123   a->roworiented  = oldmat->roworiented;
3124   a->rowindices   = NULL;
3125   a->rowvalues    = NULL;
3126   a->getrowactive = PETSC_FALSE;
3127   a->barray       = NULL;
3128   a->rstartbs     = oldmat->rstartbs;
3129   a->rendbs       = oldmat->rendbs;
3130   a->cstartbs     = oldmat->cstartbs;
3131   a->cendbs       = oldmat->cendbs;
3132 
3133   /* hash table stuff */
3134   a->ht           = NULL;
3135   a->hd           = NULL;
3136   a->ht_size      = 0;
3137   a->ht_flag      = oldmat->ht_flag;
3138   a->ht_fact      = oldmat->ht_fact;
3139   a->ht_total_ct  = 0;
3140   a->ht_insert_ct = 0;
3141 
3142   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3143   if (oldmat->colmap) {
3144 #if defined(PETSC_USE_CTABLE)
3145     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
3146 #else
3147     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3148     PetscCall(PetscLogObjectMemory((PetscObject)mat, (a->Nbs) * sizeof(PetscInt)));
3149     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3150 #endif
3151   } else a->colmap = NULL;
3152 
3153   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3154     PetscCall(PetscMalloc1(len, &a->garray));
3155     PetscCall(PetscLogObjectMemory((PetscObject)mat, len * sizeof(PetscInt)));
3156     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3157   } else a->garray = NULL;
3158 
3159   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3160   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3161   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->lvec));
3162   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3163   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->Mvctx));
3164 
3165   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3166   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
3167   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3168   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->B));
3169   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3170   *newmat = mat;
3171   PetscFunctionReturn(0);
3172 }
3173 
3174 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3175 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) {
3176   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3177   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3178   PetscScalar *matvals;
3179 
3180   PetscFunctionBegin;
3181   PetscCall(PetscViewerSetUp(viewer));
3182 
3183   /* read in matrix header */
3184   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3185   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3186   M  = header[1];
3187   N  = header[2];
3188   nz = header[3];
3189   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3190   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3191   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3192 
3193   /* set block sizes from the viewer's .info file */
3194   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3195   /* set local sizes if not set already */
3196   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3197   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3198   /* set global sizes if not set already */
3199   if (mat->rmap->N < 0) mat->rmap->N = M;
3200   if (mat->cmap->N < 0) mat->cmap->N = N;
3201   PetscCall(PetscLayoutSetUp(mat->rmap));
3202   PetscCall(PetscLayoutSetUp(mat->cmap));
3203 
3204   /* check if the matrix sizes are correct */
3205   PetscCall(MatGetSize(mat, &rows, &cols));
3206   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3207   PetscCall(MatGetBlockSize(mat, &bs));
3208   PetscCall(MatGetLocalSize(mat, &m, &n));
3209   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3210   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3211   mbs = m / bs;
3212   nbs = n / bs;
3213 
3214   /* read in row lengths and build row indices */
3215   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3216   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3217   rowidxs[0] = 0;
3218   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3219   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3220   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3221 
3222   /* read in column indices and matrix values */
3223   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3224   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3225   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3226 
3227   {                /* preallocate matrix storage */
3228     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3229     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3230     PetscBool  sbaij, done;
3231     PetscInt  *d_nnz, *o_nnz;
3232 
3233     PetscCall(PetscBTCreate(nbs, &bt));
3234     PetscCall(PetscHSetICreate(&ht));
3235     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3236     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3237     for (i = 0; i < mbs; i++) {
3238       PetscCall(PetscBTMemzero(nbs, bt));
3239       PetscCall(PetscHSetIClear(ht));
3240       for (k = 0; k < bs; k++) {
3241         PetscInt row = bs * i + k;
3242         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3243           PetscInt col = colidxs[j];
3244           if (!sbaij || col >= row) {
3245             if (col >= cs && col < ce) {
3246               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3247             } else {
3248               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3249               if (done) o_nnz[i]++;
3250             }
3251           }
3252         }
3253       }
3254     }
3255     PetscCall(PetscBTDestroy(&bt));
3256     PetscCall(PetscHSetIDestroy(&ht));
3257     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3258     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3259     PetscCall(PetscFree2(d_nnz, o_nnz));
3260   }
3261 
3262   /* store matrix values */
3263   for (i = 0; i < m; i++) {
3264     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3265     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3266   }
3267 
3268   PetscCall(PetscFree(rowidxs));
3269   PetscCall(PetscFree2(colidxs, matvals));
3270   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3271   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) {
3276   PetscBool isbinary;
3277 
3278   PetscFunctionBegin;
3279   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3280   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3281   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3282   PetscFunctionReturn(0);
3283 }
3284 
3285 /*@
3286    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3287 
3288    Input Parameters:
3289 +  mat  - the matrix
3290 -  fact - factor
3291 
3292    Options Database Key:
3293 .  -mat_use_hash_table <fact> - provide the factor
3294 
3295    Level: advanced
3296 
3297 .seealso: `MATMPIBAIJ`, `MatSetOption()`
3298 @*/
3299 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) {
3300   PetscFunctionBegin;
3301   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3302   PetscFunctionReturn(0);
3303 }
3304 
3305 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) {
3306   Mat_MPIBAIJ *baij;
3307 
3308   PetscFunctionBegin;
3309   baij          = (Mat_MPIBAIJ *)mat->data;
3310   baij->ht_fact = fact;
3311   PetscFunctionReturn(0);
3312 }
3313 
3314 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) {
3315   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3316   PetscBool    flg;
3317 
3318   PetscFunctionBegin;
3319   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3320   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3321   if (Ad) *Ad = a->A;
3322   if (Ao) *Ao = a->B;
3323   if (colmap) *colmap = a->garray;
3324   PetscFunctionReturn(0);
3325 }
3326 
3327 /*
3328     Special version for direct calls from Fortran (to eliminate two function call overheads
3329 */
3330 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3331 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3332 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3333 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3334 #endif
3335 
3336 /*@C
3337   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3338 
3339   Collective on Mat
3340 
3341   Input Parameters:
3342 + mat - the matrix
3343 . min - number of input rows
3344 . im - input rows
3345 . nin - number of input columns
3346 . in - input columns
3347 . v - numerical values input
3348 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3349 
3350   Developer Note:
3351     This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3352 
3353   Level: advanced
3354 
3355 .seealso: `MatSetValuesBlocked()`
3356 @*/
3357 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) {
3358   /* convert input arguments to C version */
3359   Mat        mat = *matin;
3360   PetscInt   m = *min, n = *nin;
3361   InsertMode addv = *addvin;
3362 
3363   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3364   const MatScalar *value;
3365   MatScalar       *barray      = baij->barray;
3366   PetscBool        roworiented = baij->roworiented;
3367   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3368   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3369   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3370 
3371   PetscFunctionBegin;
3372   /* tasks normally handled by MatSetValuesBlocked() */
3373   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3374   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3375   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3376   if (mat->assembled) {
3377     mat->was_assembled = PETSC_TRUE;
3378     mat->assembled     = PETSC_FALSE;
3379   }
3380   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3381 
3382   if (!barray) {
3383     PetscCall(PetscMalloc1(bs2, &barray));
3384     baij->barray = barray;
3385   }
3386 
3387   if (roworiented) stepval = (n - 1) * bs;
3388   else stepval = (m - 1) * bs;
3389 
3390   for (i = 0; i < m; i++) {
3391     if (im[i] < 0) continue;
3392     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3393     if (im[i] >= rstart && im[i] < rend) {
3394       row = im[i] - rstart;
3395       for (j = 0; j < n; j++) {
3396         /* If NumCol = 1 then a copy is not required */
3397         if ((roworiented) && (n == 1)) {
3398           barray = (MatScalar *)v + i * bs2;
3399         } else if ((!roworiented) && (m == 1)) {
3400           barray = (MatScalar *)v + j * bs2;
3401         } else { /* Here a copy is required */
3402           if (roworiented) {
3403             value = v + i * (stepval + bs) * bs + j * bs;
3404           } else {
3405             value = v + j * (stepval + bs) * bs + i * bs;
3406           }
3407           for (ii = 0; ii < bs; ii++, value += stepval) {
3408             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3409           }
3410           barray -= bs2;
3411         }
3412 
3413         if (in[j] >= cstart && in[j] < cend) {
3414           col = in[j] - cstart;
3415           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3416         } else if (in[j] < 0) {
3417           continue;
3418         } else {
3419           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3420           if (mat->was_assembled) {
3421             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3422 
3423 #if defined(PETSC_USE_DEBUG)
3424 #if defined(PETSC_USE_CTABLE)
3425             {
3426               PetscInt data;
3427               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
3428               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3429             }
3430 #else
3431             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3432 #endif
3433 #endif
3434 #if defined(PETSC_USE_CTABLE)
3435             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
3436             col = (col - 1) / bs;
3437 #else
3438             col = (baij->colmap[in[j]] - 1) / bs;
3439 #endif
3440             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3441               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3442               col = in[j];
3443             }
3444           } else col = in[j];
3445           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3446         }
3447       }
3448     } else {
3449       if (!baij->donotstash) {
3450         if (roworiented) {
3451           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3452         } else {
3453           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3454         }
3455       }
3456     }
3457   }
3458 
3459   /* task normally handled by MatSetValuesBlocked() */
3460   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3461   PetscFunctionReturn(0);
3462 }
3463 
3464 /*@
3465      MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3466          CSR format the local rows.
3467 
3468    Collective
3469 
3470    Input Parameters:
3471 +  comm - MPI communicator
3472 .  bs - the block size, only a block size of 1 is supported
3473 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
3474 .  n - This value should be the same as the local size used in creating the
3475        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3476        calculated if N is given) For square matrices n is almost always m.
3477 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3478 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3479 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3480 .   j - column indices
3481 -   a - matrix values
3482 
3483    Output Parameter:
3484 .   mat - the matrix
3485 
3486    Level: intermediate
3487 
3488    Notes:
3489        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3490      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3491      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3492 
3493      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3494      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3495      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3496      with column-major ordering within blocks.
3497 
3498        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3499 
3500 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3501           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3502 @*/
3503 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) {
3504   PetscFunctionBegin;
3505   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3506   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3507   PetscCall(MatCreate(comm, mat));
3508   PetscCall(MatSetSizes(*mat, m, n, M, N));
3509   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3510   PetscCall(MatSetBlockSize(*mat, bs));
3511   PetscCall(MatSetUp(*mat));
3512   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3513   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3514   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3515   PetscFunctionReturn(0);
3516 }
3517 
3518 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
3519   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3520   PetscInt    *indx;
3521   PetscScalar *values;
3522 
3523   PetscFunctionBegin;
3524   PetscCall(MatGetSize(inmat, &m, &N));
3525   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3526     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3527     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3528     PetscInt    *bindx, rmax = a->rmax, j;
3529     PetscMPIInt  rank, size;
3530 
3531     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3532     mbs = m / bs;
3533     Nbs = N / cbs;
3534     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3535     nbs = n / cbs;
3536 
3537     PetscCall(PetscMalloc1(rmax, &bindx));
3538     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3539 
3540     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3541     PetscCallMPI(MPI_Comm_rank(comm, &size));
3542     if (rank == size - 1) {
3543       /* Check sum(nbs) = Nbs */
3544       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3545     }
3546 
3547     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3548     for (i = 0; i < mbs; i++) {
3549       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3550       nnz = nnz / bs;
3551       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3552       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3553       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3554     }
3555     PetscCall(PetscFree(bindx));
3556 
3557     PetscCall(MatCreate(comm, outmat));
3558     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3559     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3560     PetscCall(MatSetType(*outmat, MATBAIJ));
3561     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3562     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3563     MatPreallocateEnd(dnz, onz);
3564     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3565   }
3566 
3567   /* numeric phase */
3568   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3569   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3570 
3571   for (i = 0; i < m; i++) {
3572     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3573     Ii = i + rstart;
3574     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3575     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3576   }
3577   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3578   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3579   PetscFunctionReturn(0);
3580 }
3581