xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 43cdf1ebbcbfa05bee08e48007ef1bae3f20f4e9)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
14   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
15   PetscScalar       *va, *vv;
16   Vec                vB, vA;
17   const PetscScalar *vb;
18 
19   PetscFunctionBegin;
20   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
21   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
22 
23   PetscCall(VecGetArrayWrite(vA, &va));
24   if (idx) {
25     for (i = 0; i < m; i++) {
26       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
27     }
28   }
29 
30   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
31   PetscCall(PetscMalloc1(m, &idxb));
32   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
33 
34   PetscCall(VecGetArrayWrite(v, &vv));
35   PetscCall(VecGetArrayRead(vB, &vb));
36   for (i = 0; i < m; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
38       vv[i] = vb[i];
39       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
40     } else {
41       vv[i] = va[i];
42       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
43     }
44   }
45   PetscCall(VecRestoreArrayWrite(vA, &vv));
46   PetscCall(VecRestoreArrayWrite(vA, &va));
47   PetscCall(VecRestoreArrayRead(vB, &vb));
48   PetscCall(PetscFree(idxb));
49   PetscCall(VecDestroy(&vA));
50   PetscCall(VecDestroy(&vB));
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
55 {
56   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
57 
58   PetscFunctionBegin;
59   PetscCall(MatStoreValues(aij->A));
60   PetscCall(MatStoreValues(aij->B));
61   PetscFunctionReturn(0);
62 }
63 
64 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
65 {
66   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
67 
68   PetscFunctionBegin;
69   PetscCall(MatRetrieveValues(aij->A));
70   PetscCall(MatRetrieveValues(aij->B));
71   PetscFunctionReturn(0);
72 }
73 
74 /*
75      Local utility routine that creates a mapping from the global column
76    number to the local number in the off-diagonal part of the local
77    storage of the matrix.  This is done in a non scalable way since the
78    length of colmap equals the global matrix length.
79 */
80 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
81 {
82   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
83   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
84   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
85 
86   PetscFunctionBegin;
87 #if defined(PETSC_USE_CTABLE)
88   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
89   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
90 #else
91   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
92   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
93 #endif
94   PetscFunctionReturn(0);
95 }
96 
97 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
98   { \
99     brow = row / bs; \
100     rp   = aj + ai[brow]; \
101     ap   = aa + bs2 * ai[brow]; \
102     rmax = aimax[brow]; \
103     nrow = ailen[brow]; \
104     bcol = col / bs; \
105     ridx = row % bs; \
106     cidx = col % bs; \
107     low  = 0; \
108     high = nrow; \
109     while (high - low > 3) { \
110       t = (low + high) / 2; \
111       if (rp[t] > bcol) high = t; \
112       else low = t; \
113     } \
114     for (_i = low; _i < high; _i++) { \
115       if (rp[_i] > bcol) break; \
116       if (rp[_i] == bcol) { \
117         bap = ap + bs2 * _i + bs * cidx + ridx; \
118         if (addv == ADD_VALUES) *bap += value; \
119         else *bap = value; \
120         goto a_noinsert; \
121       } \
122     } \
123     if (a->nonew == 1) goto a_noinsert; \
124     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
125     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
126     N = nrow++ - 1; \
127     /* shift up all the later entries in this row */ \
128     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
129     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
130     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
131     rp[_i]                          = bcol; \
132     ap[bs2 * _i + bs * cidx + ridx] = value; \
133   a_noinsert:; \
134     ailen[brow] = nrow; \
135   }
136 
137 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
138   { \
139     brow = row / bs; \
140     rp   = bj + bi[brow]; \
141     ap   = ba + bs2 * bi[brow]; \
142     rmax = bimax[brow]; \
143     nrow = bilen[brow]; \
144     bcol = col / bs; \
145     ridx = row % bs; \
146     cidx = col % bs; \
147     low  = 0; \
148     high = nrow; \
149     while (high - low > 3) { \
150       t = (low + high) / 2; \
151       if (rp[t] > bcol) high = t; \
152       else low = t; \
153     } \
154     for (_i = low; _i < high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap + bs2 * _i + bs * cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value; \
159         else *bap = value; \
160         goto b_noinsert; \
161       } \
162     } \
163     if (b->nonew == 1) goto b_noinsert; \
164     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
166     N = nrow++ - 1; \
167     /* shift up all the later entries in this row */ \
168     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
169     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
170     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
171     rp[_i]                          = bcol; \
172     ap[bs2 * _i + bs * cidx + ridx] = value; \
173   b_noinsert:; \
174     bilen[brow] = nrow; \
175   }
176 
177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
178 {
179   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
180   MatScalar    value;
181   PetscBool    roworiented = baij->roworiented;
182   PetscInt     i, j, row, col;
183   PetscInt     rstart_orig = mat->rmap->rstart;
184   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
185   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
186 
187   /* Some Variables required in the macro */
188   Mat          A     = baij->A;
189   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
190   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
191   MatScalar   *aa = a->a;
192 
193   Mat          B     = baij->B;
194   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
195   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
196   MatScalar   *ba = b->a;
197 
198   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
199   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
200   MatScalar *ap, *bap;
201 
202   PetscFunctionBegin;
203   for (i = 0; i < m; i++) {
204     if (im[i] < 0) continue;
205     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
206     if (im[i] >= rstart_orig && im[i] < rend_orig) {
207       row = im[i] - rstart_orig;
208       for (j = 0; j < n; j++) {
209         if (in[j] >= cstart_orig && in[j] < cend_orig) {
210           col = in[j] - cstart_orig;
211           if (roworiented) value = v[i * n + j];
212           else value = v[i + j * m];
213           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
214         } else if (in[j] < 0) {
215           continue;
216         } else {
217           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
218           if (mat->was_assembled) {
219             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
220 #if defined(PETSC_USE_CTABLE)
221             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
222             col = col - 1;
223 #else
224             col = baij->colmap[in[j] / bs] - 1;
225 #endif
226             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
227               PetscCall(MatDisAssemble_MPIBAIJ(mat));
228               col = in[j];
229               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
230               B     = baij->B;
231               b     = (Mat_SeqBAIJ *)(B)->data;
232               bimax = b->imax;
233               bi    = b->i;
234               bilen = b->ilen;
235               bj    = b->j;
236               ba    = b->a;
237             } else {
238               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
239               col += in[j] % bs;
240             }
241           } else col = in[j];
242           if (roworiented) value = v[i * n + j];
243           else value = v[i + j * m];
244           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
245           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
246         }
247       }
248     } else {
249       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
250       if (!baij->donotstash) {
251         mat->assembled = PETSC_FALSE;
252         if (roworiented) {
253           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
254         } else {
255           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
256         }
257       }
258     }
259   }
260   PetscFunctionReturn(0);
261 }
262 
263 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
264 {
265   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
266   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
267   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
268   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
269   PetscBool          roworiented = a->roworiented;
270   const PetscScalar *value       = v;
271   MatScalar         *ap, *aa = a->a, *bap;
272 
273   PetscFunctionBegin;
274   rp    = aj + ai[row];
275   ap    = aa + bs2 * ai[row];
276   rmax  = imax[row];
277   nrow  = ailen[row];
278   value = v;
279   low   = 0;
280   high  = nrow;
281   while (high - low > 7) {
282     t = (low + high) / 2;
283     if (rp[t] > col) high = t;
284     else low = t;
285   }
286   for (i = low; i < high; i++) {
287     if (rp[i] > col) break;
288     if (rp[i] == col) {
289       bap = ap + bs2 * i;
290       if (roworiented) {
291         if (is == ADD_VALUES) {
292           for (ii = 0; ii < bs; ii++) {
293             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
294           }
295         } else {
296           for (ii = 0; ii < bs; ii++) {
297             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
298           }
299         }
300       } else {
301         if (is == ADD_VALUES) {
302           for (ii = 0; ii < bs; ii++, value += bs) {
303             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
304             bap += bs;
305           }
306         } else {
307           for (ii = 0; ii < bs; ii++, value += bs) {
308             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
309             bap += bs;
310           }
311         }
312       }
313       goto noinsert2;
314     }
315   }
316   if (nonew == 1) goto noinsert2;
317   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
318   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
319   N = nrow++ - 1;
320   high++;
321   /* shift up all the later entries in this row */
322   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
323   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
324   rp[i] = col;
325   bap   = ap + bs2 * i;
326   if (roworiented) {
327     for (ii = 0; ii < bs; ii++) {
328       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
329     }
330   } else {
331     for (ii = 0; ii < bs; ii++) {
332       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
333     }
334   }
335 noinsert2:;
336   ailen[row] = nrow;
337   PetscFunctionReturn(0);
338 }
339 
340 /*
341     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
342     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
343 */
344 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
345 {
346   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
347   const PetscScalar *value;
348   MatScalar         *barray      = baij->barray;
349   PetscBool          roworiented = baij->roworiented;
350   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
351   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
352   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
353 
354   PetscFunctionBegin;
355   if (!barray) {
356     PetscCall(PetscMalloc1(bs2, &barray));
357     baij->barray = barray;
358   }
359 
360   if (roworiented) stepval = (n - 1) * bs;
361   else stepval = (m - 1) * bs;
362 
363   for (i = 0; i < m; i++) {
364     if (im[i] < 0) continue;
365     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
366     if (im[i] >= rstart && im[i] < rend) {
367       row = im[i] - rstart;
368       for (j = 0; j < n; j++) {
369         /* If NumCol = 1 then a copy is not required */
370         if ((roworiented) && (n == 1)) {
371           barray = (MatScalar *)v + i * bs2;
372         } else if ((!roworiented) && (m == 1)) {
373           barray = (MatScalar *)v + j * bs2;
374         } else { /* Here a copy is required */
375           if (roworiented) {
376             value = v + (i * (stepval + bs) + j) * bs;
377           } else {
378             value = v + (j * (stepval + bs) + i) * bs;
379           }
380           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
381             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
382             barray += bs;
383           }
384           barray -= bs2;
385         }
386 
387         if (in[j] >= cstart && in[j] < cend) {
388           col = in[j] - cstart;
389           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
390         } else if (in[j] < 0) {
391           continue;
392         } else {
393           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
394           if (mat->was_assembled) {
395             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
396 
397 #if defined(PETSC_USE_DEBUG)
398   #if defined(PETSC_USE_CTABLE)
399             {
400               PetscInt data;
401               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
402               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
403             }
404   #else
405             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
406   #endif
407 #endif
408 #if defined(PETSC_USE_CTABLE)
409             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
410             col = (col - 1) / bs;
411 #else
412             col = (baij->colmap[in[j]] - 1) / bs;
413 #endif
414             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
415               PetscCall(MatDisAssemble_MPIBAIJ(mat));
416               col = in[j];
417             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
418           } else col = in[j];
419           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
420         }
421       }
422     } else {
423       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
424       if (!baij->donotstash) {
425         if (roworiented) {
426           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
427         } else {
428           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
429         }
430       }
431     }
432   }
433   PetscFunctionReturn(0);
434 }
435 
436 #define HASH_KEY             0.6180339887
437 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
438 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
439 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
440 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
441 {
442   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
443   PetscBool    roworiented = baij->roworiented;
444   PetscInt     i, j, row, col;
445   PetscInt     rstart_orig = mat->rmap->rstart;
446   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
447   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
448   PetscReal    tmp;
449   MatScalar  **HD       = baij->hd, value;
450   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
451 
452   PetscFunctionBegin;
453   for (i = 0; i < m; i++) {
454     if (PetscDefined(USE_DEBUG)) {
455       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
456       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
457     }
458     row = im[i];
459     if (row >= rstart_orig && row < rend_orig) {
460       for (j = 0; j < n; j++) {
461         col = in[j];
462         if (roworiented) value = v[i * n + j];
463         else value = v[i + j * m];
464         /* Look up PetscInto the Hash Table */
465         key = (row / bs) * Nbs + (col / bs) + 1;
466         h1  = HASH(size, key, tmp);
467 
468         idx = h1;
469         if (PetscDefined(USE_DEBUG)) {
470           insert_ct++;
471           total_ct++;
472           if (HT[idx] != key) {
473             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
474               ;
475             if (idx == size) {
476               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
477                 ;
478               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
479             }
480           }
481         } else if (HT[idx] != key) {
482           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
483             ;
484           if (idx == size) {
485             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
486               ;
487             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
488           }
489         }
490         /* A HASH table entry is found, so insert the values at the correct address */
491         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
492         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
493       }
494     } else if (!baij->donotstash) {
495       if (roworiented) {
496         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
497       } else {
498         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
499       }
500     }
501   }
502   if (PetscDefined(USE_DEBUG)) {
503     baij->ht_total_ct += total_ct;
504     baij->ht_insert_ct += insert_ct;
505   }
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
512   PetscBool          roworiented = baij->roworiented;
513   PetscInt           i, j, ii, jj, row, col;
514   PetscInt           rstart = baij->rstartbs;
515   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
516   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
517   PetscReal          tmp;
518   MatScalar        **HD = baij->hd, *baij_a;
519   const PetscScalar *v_t, *value;
520   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
521 
522   PetscFunctionBegin;
523   if (roworiented) stepval = (n - 1) * bs;
524   else stepval = (m - 1) * bs;
525 
526   for (i = 0; i < m; i++) {
527     if (PetscDefined(USE_DEBUG)) {
528       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
529       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
530     }
531     row = im[i];
532     v_t = v + i * nbs2;
533     if (row >= rstart && row < rend) {
534       for (j = 0; j < n; j++) {
535         col = in[j];
536 
537         /* Look up into the Hash Table */
538         key = row * Nbs + col + 1;
539         h1  = HASH(size, key, tmp);
540 
541         idx = h1;
542         if (PetscDefined(USE_DEBUG)) {
543           total_ct++;
544           insert_ct++;
545           if (HT[idx] != key) {
546             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
547               ;
548             if (idx == size) {
549               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
550                 ;
551               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
552             }
553           }
554         } else if (HT[idx] != key) {
555           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
556             ;
557           if (idx == size) {
558             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
559               ;
560             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
561           }
562         }
563         baij_a = HD[idx];
564         if (roworiented) {
565           /*value = v + i*(stepval+bs)*bs + j*bs;*/
566           /* value = v + (i*(stepval+bs)+j)*bs; */
567           value = v_t;
568           v_t += bs;
569           if (addv == ADD_VALUES) {
570             for (ii = 0; ii < bs; ii++, value += stepval) {
571               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
572             }
573           } else {
574             for (ii = 0; ii < bs; ii++, value += stepval) {
575               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
576             }
577           }
578         } else {
579           value = v + j * (stepval + bs) * bs + i * bs;
580           if (addv == ADD_VALUES) {
581             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
582               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
583             }
584           } else {
585             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
586               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
587             }
588           }
589         }
590       }
591     } else {
592       if (!baij->donotstash) {
593         if (roworiented) {
594           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
595         } else {
596           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
597         }
598       }
599     }
600   }
601   if (PetscDefined(USE_DEBUG)) {
602     baij->ht_total_ct += total_ct;
603     baij->ht_insert_ct += insert_ct;
604   }
605   PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
609 {
610   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
611   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
612   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
613 
614   PetscFunctionBegin;
615   for (i = 0; i < m; i++) {
616     if (idxm[i] < 0) continue; /* negative row */
617     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
618     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
619       row = idxm[i] - bsrstart;
620       for (j = 0; j < n; j++) {
621         if (idxn[j] < 0) continue; /* negative column */
622         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
623         if (idxn[j] >= bscstart && idxn[j] < bscend) {
624           col = idxn[j] - bscstart;
625           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
626         } else {
627           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
628 #if defined(PETSC_USE_CTABLE)
629           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
630           data--;
631 #else
632           data = baij->colmap[idxn[j] / bs] - 1;
633 #endif
634           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
635           else {
636             col = data + idxn[j] % bs;
637             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
638           }
639         }
640       }
641     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
642   }
643   PetscFunctionReturn(0);
644 }
645 
646 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
647 {
648   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
649   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
650   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
651   PetscReal    sum = 0.0;
652   MatScalar   *v;
653 
654   PetscFunctionBegin;
655   if (baij->size == 1) {
656     PetscCall(MatNorm(baij->A, type, nrm));
657   } else {
658     if (type == NORM_FROBENIUS) {
659       v  = amat->a;
660       nz = amat->nz * bs2;
661       for (i = 0; i < nz; i++) {
662         sum += PetscRealPart(PetscConj(*v) * (*v));
663         v++;
664       }
665       v  = bmat->a;
666       nz = bmat->nz * bs2;
667       for (i = 0; i < nz; i++) {
668         sum += PetscRealPart(PetscConj(*v) * (*v));
669         v++;
670       }
671       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
672       *nrm = PetscSqrtReal(*nrm);
673     } else if (type == NORM_1) { /* max column sum */
674       PetscReal *tmp, *tmp2;
675       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
676       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
677       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
678       v  = amat->a;
679       jj = amat->j;
680       for (i = 0; i < amat->nz; i++) {
681         for (j = 0; j < bs; j++) {
682           col = bs * (cstart + *jj) + j; /* column index */
683           for (row = 0; row < bs; row++) {
684             tmp[col] += PetscAbsScalar(*v);
685             v++;
686           }
687         }
688         jj++;
689       }
690       v  = bmat->a;
691       jj = bmat->j;
692       for (i = 0; i < bmat->nz; i++) {
693         for (j = 0; j < bs; j++) {
694           col = bs * garray[*jj] + j;
695           for (row = 0; row < bs; row++) {
696             tmp[col] += PetscAbsScalar(*v);
697             v++;
698           }
699         }
700         jj++;
701       }
702       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
703       *nrm = 0.0;
704       for (j = 0; j < mat->cmap->N; j++) {
705         if (tmp2[j] > *nrm) *nrm = tmp2[j];
706       }
707       PetscCall(PetscFree(tmp));
708       PetscCall(PetscFree(tmp2));
709     } else if (type == NORM_INFINITY) { /* max row sum */
710       PetscReal *sums;
711       PetscCall(PetscMalloc1(bs, &sums));
712       sum = 0.0;
713       for (j = 0; j < amat->mbs; j++) {
714         for (row = 0; row < bs; row++) sums[row] = 0.0;
715         v  = amat->a + bs2 * amat->i[j];
716         nz = amat->i[j + 1] - amat->i[j];
717         for (i = 0; i < nz; i++) {
718           for (col = 0; col < bs; col++) {
719             for (row = 0; row < bs; row++) {
720               sums[row] += PetscAbsScalar(*v);
721               v++;
722             }
723           }
724         }
725         v  = bmat->a + bs2 * bmat->i[j];
726         nz = bmat->i[j + 1] - bmat->i[j];
727         for (i = 0; i < nz; i++) {
728           for (col = 0; col < bs; col++) {
729             for (row = 0; row < bs; row++) {
730               sums[row] += PetscAbsScalar(*v);
731               v++;
732             }
733           }
734         }
735         for (row = 0; row < bs; row++) {
736           if (sums[row] > sum) sum = sums[row];
737         }
738       }
739       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
740       PetscCall(PetscFree(sums));
741     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
742   }
743   PetscFunctionReturn(0);
744 }
745 
746 /*
747   Creates the hash table, and sets the table
748   This table is created only once.
749   If new entried need to be added to the matrix
750   then the hash table has to be destroyed and
751   recreated.
752 */
753 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
754 {
755   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
756   Mat          A = baij->A, B = baij->B;
757   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
758   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
759   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
760   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
761   PetscInt    *HT, key;
762   MatScalar  **HD;
763   PetscReal    tmp;
764 #if defined(PETSC_USE_INFO)
765   PetscInt ct = 0, max = 0;
766 #endif
767 
768   PetscFunctionBegin;
769   if (baij->ht) PetscFunctionReturn(0);
770 
771   baij->ht_size = (PetscInt)(factor * nz);
772   ht_size       = baij->ht_size;
773 
774   /* Allocate Memory for Hash Table */
775   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
776   HD = baij->hd;
777   HT = baij->ht;
778 
779   /* Loop Over A */
780   for (i = 0; i < a->mbs; i++) {
781     for (j = ai[i]; j < ai[i + 1]; j++) {
782       row = i + rstart;
783       col = aj[j] + cstart;
784 
785       key = row * Nbs + col + 1;
786       h1  = HASH(ht_size, key, tmp);
787       for (k = 0; k < ht_size; k++) {
788         if (!HT[(h1 + k) % ht_size]) {
789           HT[(h1 + k) % ht_size] = key;
790           HD[(h1 + k) % ht_size] = a->a + j * bs2;
791           break;
792 #if defined(PETSC_USE_INFO)
793         } else {
794           ct++;
795 #endif
796         }
797       }
798 #if defined(PETSC_USE_INFO)
799       if (k > max) max = k;
800 #endif
801     }
802   }
803   /* Loop Over B */
804   for (i = 0; i < b->mbs; i++) {
805     for (j = bi[i]; j < bi[i + 1]; j++) {
806       row = i + rstart;
807       col = garray[bj[j]];
808       key = row * Nbs + col + 1;
809       h1  = HASH(ht_size, key, tmp);
810       for (k = 0; k < ht_size; k++) {
811         if (!HT[(h1 + k) % ht_size]) {
812           HT[(h1 + k) % ht_size] = key;
813           HD[(h1 + k) % ht_size] = b->a + j * bs2;
814           break;
815 #if defined(PETSC_USE_INFO)
816         } else {
817           ct++;
818 #endif
819         }
820       }
821 #if defined(PETSC_USE_INFO)
822       if (k > max) max = k;
823 #endif
824     }
825   }
826 
827   /* Print Summary */
828 #if defined(PETSC_USE_INFO)
829   for (i = 0, j = 0; i < ht_size; i++) {
830     if (HT[i]) j++;
831   }
832   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
833 #endif
834   PetscFunctionReturn(0);
835 }
836 
837 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
838 {
839   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
840   PetscInt     nstash, reallocs;
841 
842   PetscFunctionBegin;
843   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
844 
845   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
846   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
847   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
848   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
849   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
850   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
855 {
856   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
857   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
858   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
859   PetscInt    *row, *col;
860   PetscBool    r1, r2, r3, other_disassembled;
861   MatScalar   *val;
862   PetscMPIInt  n;
863 
864   PetscFunctionBegin;
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   if (!baij->donotstash && !mat->nooffprocentries) {
867     while (1) {
868       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
869       if (!flg) break;
870 
871       for (i = 0; i < n;) {
872         /* Now identify the consecutive vals belonging to the same row */
873         for (j = i, rstart = row[j]; j < n; j++) {
874           if (row[j] != rstart) break;
875         }
876         if (j < n) ncols = j - i;
877         else ncols = n - i;
878         /* Now assemble all these values with a single function call */
879         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
880         i = j;
881       }
882     }
883     PetscCall(MatStashScatterEnd_Private(&mat->stash));
884     /* Now process the block-stash. Since the values are stashed column-oriented,
885        set the roworiented flag to column oriented, and after MatSetValues()
886        restore the original flags */
887     r1 = baij->roworiented;
888     r2 = a->roworiented;
889     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
890 
891     baij->roworiented = PETSC_FALSE;
892     a->roworiented    = PETSC_FALSE;
893 
894     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
895     while (1) {
896       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
897       if (!flg) break;
898 
899       for (i = 0; i < n;) {
900         /* Now identify the consecutive vals belonging to the same row */
901         for (j = i, rstart = row[j]; j < n; j++) {
902           if (row[j] != rstart) break;
903         }
904         if (j < n) ncols = j - i;
905         else ncols = n - i;
906         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
907         i = j;
908       }
909     }
910     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
911 
912     baij->roworiented = r1;
913     a->roworiented    = r2;
914 
915     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
916   }
917 
918   PetscCall(MatAssemblyBegin(baij->A, mode));
919   PetscCall(MatAssemblyEnd(baij->A, mode));
920 
921   /* determine if any processor has disassembled, if so we must
922      also disassemble ourselves, in order that we may reassemble. */
923   /*
924      if nonzero structure of submatrix B cannot change then we know that
925      no processor disassembled thus we can skip this stuff
926   */
927   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
928     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
929     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
930   }
931 
932   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
933   PetscCall(MatAssemblyBegin(baij->B, mode));
934   PetscCall(MatAssemblyEnd(baij->B, mode));
935 
936 #if defined(PETSC_USE_INFO)
937   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
938     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
939 
940     baij->ht_total_ct  = 0;
941     baij->ht_insert_ct = 0;
942   }
943 #endif
944   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
945     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
946 
947     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
948     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
949   }
950 
951   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
952 
953   baij->rowvalues = NULL;
954 
955   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
956   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
957     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
958     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
959   }
960   PetscFunctionReturn(0);
961 }
962 
963 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
964 #include <petscdraw.h>
965 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
966 {
967   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
968   PetscMPIInt       rank = baij->rank;
969   PetscInt          bs   = mat->rmap->bs;
970   PetscBool         iascii, isdraw;
971   PetscViewer       sviewer;
972   PetscViewerFormat format;
973 
974   PetscFunctionBegin;
975   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
976   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
977   if (iascii) {
978     PetscCall(PetscViewerGetFormat(viewer, &format));
979     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
980       MatInfo info;
981       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
982       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
983       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
984       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
985                                                    mat->rmap->bs, (double)info.memory));
986       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
987       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
988       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
989       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
990       PetscCall(PetscViewerFlush(viewer));
991       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
992       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
993       PetscCall(VecScatterView(baij->Mvctx, viewer));
994       PetscFunctionReturn(0);
995     } else if (format == PETSC_VIEWER_ASCII_INFO) {
996       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
997       PetscFunctionReturn(0);
998     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
999       PetscFunctionReturn(0);
1000     }
1001   }
1002 
1003   if (isdraw) {
1004     PetscDraw draw;
1005     PetscBool isnull;
1006     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1007     PetscCall(PetscDrawIsNull(draw, &isnull));
1008     if (isnull) PetscFunctionReturn(0);
1009   }
1010 
1011   {
1012     /* assemble the entire matrix onto first processor. */
1013     Mat          A;
1014     Mat_SeqBAIJ *Aloc;
1015     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1016     MatScalar   *a;
1017     const char  *matname;
1018 
1019     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1020     /* Perhaps this should be the type of mat? */
1021     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1022     if (rank == 0) {
1023       PetscCall(MatSetSizes(A, M, N, M, N));
1024     } else {
1025       PetscCall(MatSetSizes(A, 0, 0, M, N));
1026     }
1027     PetscCall(MatSetType(A, MATMPIBAIJ));
1028     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1029     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1030 
1031     /* copy over the A part */
1032     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1033     ai   = Aloc->i;
1034     aj   = Aloc->j;
1035     a    = Aloc->a;
1036     PetscCall(PetscMalloc1(bs, &rvals));
1037 
1038     for (i = 0; i < mbs; i++) {
1039       rvals[0] = bs * (baij->rstartbs + i);
1040       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1041       for (j = ai[i]; j < ai[i + 1]; j++) {
1042         col = (baij->cstartbs + aj[j]) * bs;
1043         for (k = 0; k < bs; k++) {
1044           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1045           col++;
1046           a += bs;
1047         }
1048       }
1049     }
1050     /* copy over the B part */
1051     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1052     ai   = Aloc->i;
1053     aj   = Aloc->j;
1054     a    = Aloc->a;
1055     for (i = 0; i < mbs; i++) {
1056       rvals[0] = bs * (baij->rstartbs + i);
1057       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1058       for (j = ai[i]; j < ai[i + 1]; j++) {
1059         col = baij->garray[aj[j]] * bs;
1060         for (k = 0; k < bs; k++) {
1061           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1062           col++;
1063           a += bs;
1064         }
1065       }
1066     }
1067     PetscCall(PetscFree(rvals));
1068     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1069     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1070     /*
1071        Everyone has to call to draw the matrix since the graphics waits are
1072        synchronized across all processors that share the PetscDraw object
1073     */
1074     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1075     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1076     if (rank == 0) {
1077       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1078       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1079     }
1080     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1081     PetscCall(PetscViewerFlush(viewer));
1082     PetscCall(MatDestroy(&A));
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1089 {
1090   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1091   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1092   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1093   const PetscInt *garray = aij->garray;
1094   PetscInt        header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l;
1095   PetscInt       *rowlens, *colidxs;
1096   PetscScalar    *matvals;
1097 
1098   PetscFunctionBegin;
1099   PetscCall(PetscViewerSetUp(viewer));
1100 
1101   M  = mat->rmap->N;
1102   N  = mat->cmap->N;
1103   m  = mat->rmap->n;
1104   rs = mat->rmap->rstart;
1105   cs = mat->cmap->rstart;
1106   bs = mat->rmap->bs;
1107   nz = bs * bs * (A->nz + B->nz);
1108 
1109   /* write matrix header */
1110   header[0] = MAT_FILE_CLASSID;
1111   header[1] = M;
1112   header[2] = N;
1113   header[3] = nz;
1114   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1115   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1116 
1117   /* fill in and store row lengths */
1118   PetscCall(PetscMalloc1(m, &rowlens));
1119   for (cnt = 0, i = 0; i < A->mbs; i++)
1120     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1121   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1122   PetscCall(PetscFree(rowlens));
1123 
1124   /* fill in and store column indices */
1125   PetscCall(PetscMalloc1(nz, &colidxs));
1126   for (cnt = 0, i = 0; i < A->mbs; i++) {
1127     for (k = 0; k < bs; k++) {
1128       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1129         if (garray[B->j[jb]] > cs / bs) break;
1130         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1131       }
1132       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1133         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1134       for (; jb < B->i[i + 1]; jb++)
1135         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1136     }
1137   }
1138   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1139   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1140   PetscCall(PetscFree(colidxs));
1141 
1142   /* fill in and store nonzero values */
1143   PetscCall(PetscMalloc1(nz, &matvals));
1144   for (cnt = 0, i = 0; i < A->mbs; i++) {
1145     for (k = 0; k < bs; k++) {
1146       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1147         if (garray[B->j[jb]] > cs / bs) break;
1148         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1149       }
1150       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1151         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1152       for (; jb < B->i[i + 1]; jb++)
1153         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1154     }
1155   }
1156   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1157   PetscCall(PetscFree(matvals));
1158 
1159   /* write block size option to the viewer's .info file */
1160   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1165 {
1166   PetscBool iascii, isdraw, issocket, isbinary;
1167 
1168   PetscFunctionBegin;
1169   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1170   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1171   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1172   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1173   if (iascii || isdraw || issocket) {
1174     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1175   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1180 {
1181   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1182 
1183   PetscFunctionBegin;
1184 #if defined(PETSC_USE_LOG)
1185   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1186 #endif
1187   PetscCall(MatStashDestroy_Private(&mat->stash));
1188   PetscCall(MatStashDestroy_Private(&mat->bstash));
1189   PetscCall(MatDestroy(&baij->A));
1190   PetscCall(MatDestroy(&baij->B));
1191 #if defined(PETSC_USE_CTABLE)
1192   PetscCall(PetscHMapIDestroy(&baij->colmap));
1193 #else
1194   PetscCall(PetscFree(baij->colmap));
1195 #endif
1196   PetscCall(PetscFree(baij->garray));
1197   PetscCall(VecDestroy(&baij->lvec));
1198   PetscCall(VecScatterDestroy(&baij->Mvctx));
1199   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1200   PetscCall(PetscFree(baij->barray));
1201   PetscCall(PetscFree2(baij->hd, baij->ht));
1202   PetscCall(PetscFree(baij->rangebs));
1203   PetscCall(PetscFree(mat->data));
1204 
1205   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1206   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1207   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1208   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
1209   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
1210   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1211   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
1212   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
1213   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
1214   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
1215 #if defined(PETSC_HAVE_HYPRE)
1216   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
1217 #endif
1218   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1223 {
1224   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1225   PetscInt     nt;
1226 
1227   PetscFunctionBegin;
1228   PetscCall(VecGetLocalSize(xx, &nt));
1229   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1230   PetscCall(VecGetLocalSize(yy, &nt));
1231   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible parition of A and yy");
1232   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1233   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1234   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1235   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1240 {
1241   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1242 
1243   PetscFunctionBegin;
1244   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1245   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1246   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1247   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1252 {
1253   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1254 
1255   PetscFunctionBegin;
1256   /* do nondiagonal part */
1257   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1258   /* do local part */
1259   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1260   /* add partial results together */
1261   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1262   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1267 {
1268   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1269 
1270   PetscFunctionBegin;
1271   /* do nondiagonal part */
1272   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1273   /* do local part */
1274   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1275   /* add partial results together */
1276   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1277   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 /*
1282   This only works correctly for square matrices where the subblock A->A is the
1283    diagonal block
1284 */
1285 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1286 {
1287   PetscFunctionBegin;
1288   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1289   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1294 {
1295   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1296 
1297   PetscFunctionBegin;
1298   PetscCall(MatScale(a->A, aa));
1299   PetscCall(MatScale(a->B, aa));
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1304 {
1305   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1306   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1307   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1308   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1309   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1310 
1311   PetscFunctionBegin;
1312   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1313   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1314   mat->getrowactive = PETSC_TRUE;
1315 
1316   if (!mat->rowvalues && (idx || v)) {
1317     /*
1318         allocate enough space to hold information from the longest row.
1319     */
1320     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1321     PetscInt     max = 1, mbs = mat->mbs, tmp;
1322     for (i = 0; i < mbs; i++) {
1323       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1324       if (max < tmp) max = tmp;
1325     }
1326     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1327   }
1328   lrow = row - brstart;
1329 
1330   pvA = &vworkA;
1331   pcA = &cworkA;
1332   pvB = &vworkB;
1333   pcB = &cworkB;
1334   if (!v) {
1335     pvA = NULL;
1336     pvB = NULL;
1337   }
1338   if (!idx) {
1339     pcA = NULL;
1340     if (!v) pcB = NULL;
1341   }
1342   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1343   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1344   nztot = nzA + nzB;
1345 
1346   cmap = mat->garray;
1347   if (v || idx) {
1348     if (nztot) {
1349       /* Sort by increasing column numbers, assuming A and B already sorted */
1350       PetscInt imark = -1;
1351       if (v) {
1352         *v = v_p = mat->rowvalues;
1353         for (i = 0; i < nzB; i++) {
1354           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1355           else break;
1356         }
1357         imark = i;
1358         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1359         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1360       }
1361       if (idx) {
1362         *idx = idx_p = mat->rowindices;
1363         if (imark > -1) {
1364           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1365         } else {
1366           for (i = 0; i < nzB; i++) {
1367             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1368             else break;
1369           }
1370           imark = i;
1371         }
1372         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1373         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1374       }
1375     } else {
1376       if (idx) *idx = NULL;
1377       if (v) *v = NULL;
1378     }
1379   }
1380   *nz = nztot;
1381   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1382   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1387 {
1388   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1389 
1390   PetscFunctionBegin;
1391   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1392   baij->getrowactive = PETSC_FALSE;
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1397 {
1398   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1399 
1400   PetscFunctionBegin;
1401   PetscCall(MatZeroEntries(l->A));
1402   PetscCall(MatZeroEntries(l->B));
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1407 {
1408   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1409   Mat            A = a->A, B = a->B;
1410   PetscLogDouble isend[5], irecv[5];
1411 
1412   PetscFunctionBegin;
1413   info->block_size = (PetscReal)matin->rmap->bs;
1414 
1415   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1416 
1417   isend[0] = info->nz_used;
1418   isend[1] = info->nz_allocated;
1419   isend[2] = info->nz_unneeded;
1420   isend[3] = info->memory;
1421   isend[4] = info->mallocs;
1422 
1423   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1424 
1425   isend[0] += info->nz_used;
1426   isend[1] += info->nz_allocated;
1427   isend[2] += info->nz_unneeded;
1428   isend[3] += info->memory;
1429   isend[4] += info->mallocs;
1430 
1431   if (flag == MAT_LOCAL) {
1432     info->nz_used      = isend[0];
1433     info->nz_allocated = isend[1];
1434     info->nz_unneeded  = isend[2];
1435     info->memory       = isend[3];
1436     info->mallocs      = isend[4];
1437   } else if (flag == MAT_GLOBAL_MAX) {
1438     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1439 
1440     info->nz_used      = irecv[0];
1441     info->nz_allocated = irecv[1];
1442     info->nz_unneeded  = irecv[2];
1443     info->memory       = irecv[3];
1444     info->mallocs      = irecv[4];
1445   } else if (flag == MAT_GLOBAL_SUM) {
1446     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1454   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1455   info->fill_ratio_needed = 0;
1456   info->factor_mallocs    = 0;
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1461 {
1462   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1463 
1464   PetscFunctionBegin;
1465   switch (op) {
1466   case MAT_NEW_NONZERO_LOCATIONS:
1467   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1468   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1469   case MAT_KEEP_NONZERO_PATTERN:
1470   case MAT_NEW_NONZERO_LOCATION_ERR:
1471     MatCheckPreallocated(A, 1);
1472     PetscCall(MatSetOption(a->A, op, flg));
1473     PetscCall(MatSetOption(a->B, op, flg));
1474     break;
1475   case MAT_ROW_ORIENTED:
1476     MatCheckPreallocated(A, 1);
1477     a->roworiented = flg;
1478 
1479     PetscCall(MatSetOption(a->A, op, flg));
1480     PetscCall(MatSetOption(a->B, op, flg));
1481     break;
1482   case MAT_FORCE_DIAGONAL_ENTRIES:
1483   case MAT_SORTED_FULL:
1484     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1485     break;
1486   case MAT_IGNORE_OFF_PROC_ENTRIES:
1487     a->donotstash = flg;
1488     break;
1489   case MAT_USE_HASH_TABLE:
1490     a->ht_flag = flg;
1491     a->ht_fact = 1.39;
1492     break;
1493   case MAT_SYMMETRIC:
1494   case MAT_STRUCTURALLY_SYMMETRIC:
1495   case MAT_HERMITIAN:
1496   case MAT_SUBMAT_SINGLEIS:
1497   case MAT_SYMMETRY_ETERNAL:
1498   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1499   case MAT_SPD_ETERNAL:
1500     /* if the diagonal matrix is square it inherits some of the properties above */
1501     break;
1502   default:
1503     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1504   }
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1509 {
1510   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1511   Mat_SeqBAIJ *Aloc;
1512   Mat          B;
1513   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1514   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1515   MatScalar   *a;
1516 
1517   PetscFunctionBegin;
1518   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1519   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1520     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1521     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1522     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1523     /* Do not know preallocation information, but must set block size */
1524     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1525   } else {
1526     B = *matout;
1527   }
1528 
1529   /* copy over the A part */
1530   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1531   ai   = Aloc->i;
1532   aj   = Aloc->j;
1533   a    = Aloc->a;
1534   PetscCall(PetscMalloc1(bs, &rvals));
1535 
1536   for (i = 0; i < mbs; i++) {
1537     rvals[0] = bs * (baij->rstartbs + i);
1538     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1539     for (j = ai[i]; j < ai[i + 1]; j++) {
1540       col = (baij->cstartbs + aj[j]) * bs;
1541       for (k = 0; k < bs; k++) {
1542         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1543 
1544         col++;
1545         a += bs;
1546       }
1547     }
1548   }
1549   /* copy over the B part */
1550   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1551   ai   = Aloc->i;
1552   aj   = Aloc->j;
1553   a    = Aloc->a;
1554   for (i = 0; i < mbs; i++) {
1555     rvals[0] = bs * (baij->rstartbs + i);
1556     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1557     for (j = ai[i]; j < ai[i + 1]; j++) {
1558       col = baij->garray[aj[j]] * bs;
1559       for (k = 0; k < bs; k++) {
1560         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1561         col++;
1562         a += bs;
1563       }
1564     }
1565   }
1566   PetscCall(PetscFree(rvals));
1567   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1568   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1569 
1570   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1571   else PetscCall(MatHeaderMerge(A, &B));
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1576 {
1577   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1578   Mat          a = baij->A, b = baij->B;
1579   PetscInt     s1, s2, s3;
1580 
1581   PetscFunctionBegin;
1582   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1583   if (rr) {
1584     PetscCall(VecGetLocalSize(rr, &s1));
1585     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1586     /* Overlap communication with computation. */
1587     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1588   }
1589   if (ll) {
1590     PetscCall(VecGetLocalSize(ll, &s1));
1591     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1592     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1593   }
1594   /* scale  the diagonal block */
1595   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1596 
1597   if (rr) {
1598     /* Do a scatter end and then right scale the off-diagonal block */
1599     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1600     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1601   }
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1606 {
1607   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1608   PetscInt    *lrows;
1609   PetscInt     r, len;
1610   PetscBool    cong;
1611 
1612   PetscFunctionBegin;
1613   /* get locally owned rows */
1614   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1615   /* fix right hand side if needed */
1616   if (x && b) {
1617     const PetscScalar *xx;
1618     PetscScalar       *bb;
1619 
1620     PetscCall(VecGetArrayRead(x, &xx));
1621     PetscCall(VecGetArray(b, &bb));
1622     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1623     PetscCall(VecRestoreArrayRead(x, &xx));
1624     PetscCall(VecRestoreArray(b, &bb));
1625   }
1626 
1627   /* actually zap the local rows */
1628   /*
1629         Zero the required rows. If the "diagonal block" of the matrix
1630      is square and the user wishes to set the diagonal we use separate
1631      code so that MatSetValues() is not called for each diagonal allocating
1632      new memory, thus calling lots of mallocs and slowing things down.
1633 
1634   */
1635   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1636   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1637   PetscCall(MatHasCongruentLayouts(A, &cong));
1638   if ((diag != 0.0) && cong) {
1639     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1640   } else if (diag != 0.0) {
1641     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1642     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1643        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1644     for (r = 0; r < len; ++r) {
1645       const PetscInt row = lrows[r] + A->rmap->rstart;
1646       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1647     }
1648     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1649     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1650   } else {
1651     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1652   }
1653   PetscCall(PetscFree(lrows));
1654 
1655   /* only change matrix nonzero state if pattern was allowed to be changed */
1656   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1657     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1658     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1659   }
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1664 {
1665   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1666   PetscMPIInt        n = A->rmap->n, p = 0;
1667   PetscInt           i, j, k, r, len = 0, row, col, count;
1668   PetscInt          *lrows, *owners = A->rmap->range;
1669   PetscSFNode       *rrows;
1670   PetscSF            sf;
1671   const PetscScalar *xx;
1672   PetscScalar       *bb, *mask;
1673   Vec                xmask, lmask;
1674   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1675   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1676   PetscScalar       *aa;
1677 
1678   PetscFunctionBegin;
1679   /* Create SF where leaves are input rows and roots are owned rows */
1680   PetscCall(PetscMalloc1(n, &lrows));
1681   for (r = 0; r < n; ++r) lrows[r] = -1;
1682   PetscCall(PetscMalloc1(N, &rrows));
1683   for (r = 0; r < N; ++r) {
1684     const PetscInt idx = rows[r];
1685     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1686     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1687       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1688     }
1689     rrows[r].rank  = p;
1690     rrows[r].index = rows[r] - owners[p];
1691   }
1692   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1693   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1694   /* Collect flags for rows to be zeroed */
1695   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1696   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1697   PetscCall(PetscSFDestroy(&sf));
1698   /* Compress and put in row numbers */
1699   for (r = 0; r < n; ++r)
1700     if (lrows[r] >= 0) lrows[len++] = r;
1701   /* zero diagonal part of matrix */
1702   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1703   /* handle off diagonal part of matrix */
1704   PetscCall(MatCreateVecs(A, &xmask, NULL));
1705   PetscCall(VecDuplicate(l->lvec, &lmask));
1706   PetscCall(VecGetArray(xmask, &bb));
1707   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1708   PetscCall(VecRestoreArray(xmask, &bb));
1709   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1710   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1711   PetscCall(VecDestroy(&xmask));
1712   if (x) {
1713     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1714     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1715     PetscCall(VecGetArrayRead(l->lvec, &xx));
1716     PetscCall(VecGetArray(b, &bb));
1717   }
1718   PetscCall(VecGetArray(lmask, &mask));
1719   /* remove zeroed rows of off diagonal matrix */
1720   for (i = 0; i < len; ++i) {
1721     row   = lrows[i];
1722     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1723     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1724     for (k = 0; k < count; ++k) {
1725       aa[0] = 0.0;
1726       aa += bs;
1727     }
1728   }
1729   /* loop over all elements of off process part of matrix zeroing removed columns*/
1730   for (i = 0; i < l->B->rmap->N; ++i) {
1731     row = i / bs;
1732     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1733       for (k = 0; k < bs; ++k) {
1734         col = bs * baij->j[j] + k;
1735         if (PetscAbsScalar(mask[col])) {
1736           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1737           if (x) bb[i] -= aa[0] * xx[col];
1738           aa[0] = 0.0;
1739         }
1740       }
1741     }
1742   }
1743   if (x) {
1744     PetscCall(VecRestoreArray(b, &bb));
1745     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1746   }
1747   PetscCall(VecRestoreArray(lmask, &mask));
1748   PetscCall(VecDestroy(&lmask));
1749   PetscCall(PetscFree(lrows));
1750 
1751   /* only change matrix nonzero state if pattern was allowed to be changed */
1752   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1753     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1754     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1760 {
1761   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1762 
1763   PetscFunctionBegin;
1764   PetscCall(MatSetUnfactored(a->A));
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1769 
1770 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1771 {
1772   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1773   Mat          a, b, c, d;
1774   PetscBool    flg;
1775 
1776   PetscFunctionBegin;
1777   a = matA->A;
1778   b = matA->B;
1779   c = matB->A;
1780   d = matB->B;
1781 
1782   PetscCall(MatEqual(a, c, &flg));
1783   if (flg) PetscCall(MatEqual(b, d, &flg));
1784   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1789 {
1790   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1791   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1792 
1793   PetscFunctionBegin;
1794   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1795   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1796     PetscCall(MatCopy_Basic(A, B, str));
1797   } else {
1798     PetscCall(MatCopy(a->A, b->A, str));
1799     PetscCall(MatCopy(a->B, b->B, str));
1800   }
1801   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1806 {
1807   PetscFunctionBegin;
1808   PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1813 {
1814   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1815   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1816   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1817 
1818   PetscFunctionBegin;
1819   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1824 {
1825   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1826   PetscBLASInt bnz, one                         = 1;
1827   Mat_SeqBAIJ *x, *y;
1828   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1829 
1830   PetscFunctionBegin;
1831   if (str == SAME_NONZERO_PATTERN) {
1832     PetscScalar alpha = a;
1833     x                 = (Mat_SeqBAIJ *)xx->A->data;
1834     y                 = (Mat_SeqBAIJ *)yy->A->data;
1835     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1836     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1837     x = (Mat_SeqBAIJ *)xx->B->data;
1838     y = (Mat_SeqBAIJ *)yy->B->data;
1839     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1840     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1841     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1842   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1843     PetscCall(MatAXPY_Basic(Y, a, X, str));
1844   } else {
1845     Mat       B;
1846     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1847     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1848     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1849     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1850     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1851     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1852     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1853     PetscCall(MatSetType(B, MATMPIBAIJ));
1854     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1855     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1856     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1857     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1858     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1859     PetscCall(MatHeaderMerge(Y, &B));
1860     PetscCall(PetscFree(nnz_d));
1861     PetscCall(PetscFree(nnz_o));
1862   }
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1867 {
1868   PetscFunctionBegin;
1869   if (PetscDefined(USE_COMPLEX)) {
1870     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1871 
1872     PetscCall(MatConjugate_SeqBAIJ(a->A));
1873     PetscCall(MatConjugate_SeqBAIJ(a->B));
1874   }
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1879 {
1880   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1881 
1882   PetscFunctionBegin;
1883   PetscCall(MatRealPart(a->A));
1884   PetscCall(MatRealPart(a->B));
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1889 {
1890   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1891 
1892   PetscFunctionBegin;
1893   PetscCall(MatImaginaryPart(a->A));
1894   PetscCall(MatImaginaryPart(a->B));
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1899 {
1900   IS       iscol_local;
1901   PetscInt csize;
1902 
1903   PetscFunctionBegin;
1904   PetscCall(ISGetLocalSize(iscol, &csize));
1905   if (call == MAT_REUSE_MATRIX) {
1906     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1907     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1908   } else {
1909     PetscCall(ISAllGather(iscol, &iscol_local));
1910   }
1911   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1912   if (call == MAT_INITIAL_MATRIX) {
1913     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1914     PetscCall(ISDestroy(&iscol_local));
1915   }
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 /*
1920   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1921   in local and then by concatenating the local matrices the end result.
1922   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1923   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1924 */
1925 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
1926 {
1927   PetscMPIInt  rank, size;
1928   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1929   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1930   Mat          M, Mreuse;
1931   MatScalar   *vwork, *aa;
1932   MPI_Comm     comm;
1933   IS           isrow_new, iscol_new;
1934   Mat_SeqBAIJ *aij;
1935 
1936   PetscFunctionBegin;
1937   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1938   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1939   PetscCallMPI(MPI_Comm_size(comm, &size));
1940   /* The compression and expansion should be avoided. Doesn't point
1941      out errors, might change the indices, hence buggey */
1942   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1943   PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1944 
1945   if (call == MAT_REUSE_MATRIX) {
1946     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1947     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1948     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse));
1949   } else {
1950     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse));
1951   }
1952   PetscCall(ISDestroy(&isrow_new));
1953   PetscCall(ISDestroy(&iscol_new));
1954   /*
1955       m - number of local rows
1956       n - number of columns (same on all processors)
1957       rstart - first row in new global matrix generated
1958   */
1959   PetscCall(MatGetBlockSize(mat, &bs));
1960   PetscCall(MatGetSize(Mreuse, &m, &n));
1961   m = m / bs;
1962   n = n / bs;
1963 
1964   if (call == MAT_INITIAL_MATRIX) {
1965     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1966     ii  = aij->i;
1967     jj  = aij->j;
1968 
1969     /*
1970         Determine the number of non-zeros in the diagonal and off-diagonal
1971         portions of the matrix in order to do correct preallocation
1972     */
1973 
1974     /* first get start and end of "diagonal" columns */
1975     if (csize == PETSC_DECIDE) {
1976       PetscCall(ISGetSize(isrow, &mglobal));
1977       if (mglobal == n * bs) { /* square matrix */
1978         nlocal = m;
1979       } else {
1980         nlocal = n / size + ((n % size) > rank);
1981       }
1982     } else {
1983       nlocal = csize / bs;
1984     }
1985     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1986     rstart = rend - nlocal;
1987     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1988 
1989     /* next, compute all the lengths */
1990     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1991     for (i = 0; i < m; i++) {
1992       jend = ii[i + 1] - ii[i];
1993       olen = 0;
1994       dlen = 0;
1995       for (j = 0; j < jend; j++) {
1996         if (*jj < rstart || *jj >= rend) olen++;
1997         else dlen++;
1998         jj++;
1999       }
2000       olens[i] = olen;
2001       dlens[i] = dlen;
2002     }
2003     PetscCall(MatCreate(comm, &M));
2004     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2005     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
2006     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2007     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2008     PetscCall(PetscFree2(dlens, olens));
2009   } else {
2010     PetscInt ml, nl;
2011 
2012     M = *newmat;
2013     PetscCall(MatGetLocalSize(M, &ml, &nl));
2014     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2015     PetscCall(MatZeroEntries(M));
2016     /*
2017          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2018        rather than the slower MatSetValues().
2019     */
2020     M->was_assembled = PETSC_TRUE;
2021     M->assembled     = PETSC_FALSE;
2022   }
2023   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2024   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2025   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2026   ii  = aij->i;
2027   jj  = aij->j;
2028   aa  = aij->a;
2029   for (i = 0; i < m; i++) {
2030     row   = rstart / bs + i;
2031     nz    = ii[i + 1] - ii[i];
2032     cwork = jj;
2033     jj += nz;
2034     vwork = aa;
2035     aa += nz * bs * bs;
2036     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2037   }
2038 
2039   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2040   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2041   *newmat = M;
2042 
2043   /* save submatrix used in processor for next request */
2044   if (call == MAT_INITIAL_MATRIX) {
2045     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2046     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2052 {
2053   MPI_Comm        comm, pcomm;
2054   PetscInt        clocal_size, nrows;
2055   const PetscInt *rows;
2056   PetscMPIInt     size;
2057   IS              crowp, lcolp;
2058 
2059   PetscFunctionBegin;
2060   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2061   /* make a collective version of 'rowp' */
2062   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2063   if (pcomm == comm) {
2064     crowp = rowp;
2065   } else {
2066     PetscCall(ISGetSize(rowp, &nrows));
2067     PetscCall(ISGetIndices(rowp, &rows));
2068     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2069     PetscCall(ISRestoreIndices(rowp, &rows));
2070   }
2071   PetscCall(ISSetPermutation(crowp));
2072   /* make a local version of 'colp' */
2073   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2074   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2075   if (size == 1) {
2076     lcolp = colp;
2077   } else {
2078     PetscCall(ISAllGather(colp, &lcolp));
2079   }
2080   PetscCall(ISSetPermutation(lcolp));
2081   /* now we just get the submatrix */
2082   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2083   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B));
2084   /* clean up */
2085   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2086   if (size > 1) PetscCall(ISDestroy(&lcolp));
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2091 {
2092   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2093   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2094 
2095   PetscFunctionBegin;
2096   if (nghosts) *nghosts = B->nbs;
2097   if (ghosts) *ghosts = baij->garray;
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2102 {
2103   Mat          B;
2104   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2105   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2106   Mat_SeqAIJ  *b;
2107   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2108   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2109   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2110 
2111   PetscFunctionBegin;
2112   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2113   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2114 
2115   /* ----------------------------------------------------------------
2116      Tell every processor the number of nonzeros per row
2117   */
2118   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2119   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2120   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2121   displs = recvcounts + size;
2122   for (i = 0; i < size; i++) {
2123     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2124     displs[i]     = A->rmap->range[i] / bs;
2125   }
2126   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2127   /* ---------------------------------------------------------------
2128      Create the sequential matrix of the same type as the local block diagonal
2129   */
2130   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2131   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2132   PetscCall(MatSetType(B, MATSEQAIJ));
2133   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2134   b = (Mat_SeqAIJ *)B->data;
2135 
2136   /*--------------------------------------------------------------------
2137     Copy my part of matrix column indices over
2138   */
2139   sendcount  = ad->nz + bd->nz;
2140   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2141   a_jsendbuf = ad->j;
2142   b_jsendbuf = bd->j;
2143   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2144   cnt        = 0;
2145   for (i = 0; i < n; i++) {
2146     /* put in lower diagonal portion */
2147     m = bd->i[i + 1] - bd->i[i];
2148     while (m > 0) {
2149       /* is it above diagonal (in bd (compressed) numbering) */
2150       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2151       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2152       m--;
2153     }
2154 
2155     /* put in diagonal portion */
2156     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2157 
2158     /* put in upper diagonal portion */
2159     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2160   }
2161   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2162 
2163   /*--------------------------------------------------------------------
2164     Gather all column indices to all processors
2165   */
2166   for (i = 0; i < size; i++) {
2167     recvcounts[i] = 0;
2168     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2169   }
2170   displs[0] = 0;
2171   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2172   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2173   /*--------------------------------------------------------------------
2174     Assemble the matrix into useable form (note numerical values not yet set)
2175   */
2176   /* set the b->ilen (length of each row) values */
2177   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2178   /* set the b->i indices */
2179   b->i[0] = 0;
2180   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2181   PetscCall(PetscFree(lens));
2182   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2183   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2184   PetscCall(PetscFree(recvcounts));
2185 
2186   PetscCall(MatPropagateSymmetryOptions(A, B));
2187   *newmat = B;
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2192 {
2193   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2194   Vec          bb1 = NULL;
2195 
2196   PetscFunctionBegin;
2197   if (flag == SOR_APPLY_UPPER) {
2198     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2199     PetscFunctionReturn(0);
2200   }
2201 
2202   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2203 
2204   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2205     if (flag & SOR_ZERO_INITIAL_GUESS) {
2206       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2207       its--;
2208     }
2209 
2210     while (its--) {
2211       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2212       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2213 
2214       /* update rhs: bb1 = bb - B*x */
2215       PetscCall(VecScale(mat->lvec, -1.0));
2216       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2217 
2218       /* local sweep */
2219       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2220     }
2221   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2222     if (flag & SOR_ZERO_INITIAL_GUESS) {
2223       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2224       its--;
2225     }
2226     while (its--) {
2227       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2228       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2229 
2230       /* update rhs: bb1 = bb - B*x */
2231       PetscCall(VecScale(mat->lvec, -1.0));
2232       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2233 
2234       /* local sweep */
2235       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2236     }
2237   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2238     if (flag & SOR_ZERO_INITIAL_GUESS) {
2239       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2240       its--;
2241     }
2242     while (its--) {
2243       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2244       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2245 
2246       /* update rhs: bb1 = bb - B*x */
2247       PetscCall(VecScale(mat->lvec, -1.0));
2248       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2249 
2250       /* local sweep */
2251       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2252     }
2253   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2254 
2255   PetscCall(VecDestroy(&bb1));
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2260 {
2261   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2262   PetscInt     m, N, i, *garray = aij->garray;
2263   PetscInt     ib, jb, bs = A->rmap->bs;
2264   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2265   MatScalar   *a_val = a_aij->a;
2266   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2267   MatScalar   *b_val = b_aij->a;
2268   PetscReal   *work;
2269 
2270   PetscFunctionBegin;
2271   PetscCall(MatGetSize(A, &m, &N));
2272   PetscCall(PetscCalloc1(N, &work));
2273   if (type == NORM_2) {
2274     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2275       for (jb = 0; jb < bs; jb++) {
2276         for (ib = 0; ib < bs; ib++) {
2277           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2278           a_val++;
2279         }
2280       }
2281     }
2282     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2283       for (jb = 0; jb < bs; jb++) {
2284         for (ib = 0; ib < bs; ib++) {
2285           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2286           b_val++;
2287         }
2288       }
2289     }
2290   } else if (type == NORM_1) {
2291     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2292       for (jb = 0; jb < bs; jb++) {
2293         for (ib = 0; ib < bs; ib++) {
2294           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2295           a_val++;
2296         }
2297       }
2298     }
2299     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2300       for (jb = 0; jb < bs; jb++) {
2301         for (ib = 0; ib < bs; ib++) {
2302           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2303           b_val++;
2304         }
2305       }
2306     }
2307   } else if (type == NORM_INFINITY) {
2308     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2309       for (jb = 0; jb < bs; jb++) {
2310         for (ib = 0; ib < bs; ib++) {
2311           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2312           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2313           a_val++;
2314         }
2315       }
2316     }
2317     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2318       for (jb = 0; jb < bs; jb++) {
2319         for (ib = 0; ib < bs; ib++) {
2320           int col   = garray[b_aij->j[i]] * bs + jb;
2321           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2322           b_val++;
2323         }
2324       }
2325     }
2326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2327     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2328       for (jb = 0; jb < bs; jb++) {
2329         for (ib = 0; ib < bs; ib++) {
2330           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2331           a_val++;
2332         }
2333       }
2334     }
2335     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2336       for (jb = 0; jb < bs; jb++) {
2337         for (ib = 0; ib < bs; ib++) {
2338           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2339           b_val++;
2340         }
2341       }
2342     }
2343   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2344     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2345       for (jb = 0; jb < bs; jb++) {
2346         for (ib = 0; ib < bs; ib++) {
2347           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2348           a_val++;
2349         }
2350       }
2351     }
2352     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2353       for (jb = 0; jb < bs; jb++) {
2354         for (ib = 0; ib < bs; ib++) {
2355           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2356           b_val++;
2357         }
2358       }
2359     }
2360   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2361   if (type == NORM_INFINITY) {
2362     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2363   } else {
2364     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2365   }
2366   PetscCall(PetscFree(work));
2367   if (type == NORM_2) {
2368     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2369   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2370     for (i = 0; i < N; i++) reductions[i] /= m;
2371   }
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2376 {
2377   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2378 
2379   PetscFunctionBegin;
2380   PetscCall(MatInvertBlockDiagonal(a->A, values));
2381   A->factorerrortype             = a->A->factorerrortype;
2382   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2383   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2388 {
2389   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2390   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2391 
2392   PetscFunctionBegin;
2393   if (!Y->preallocated) {
2394     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2395   } else if (!aij->nz) {
2396     PetscInt nonew = aij->nonew;
2397     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2398     aij->nonew = nonew;
2399   }
2400   PetscCall(MatShift_Basic(Y, a));
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2405 {
2406   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2407 
2408   PetscFunctionBegin;
2409   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2410   PetscCall(MatMissingDiagonal(a->A, missing, d));
2411   if (d) {
2412     PetscInt rstart;
2413     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2414     *d += rstart / A->rmap->bs;
2415   }
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2420 {
2421   PetscFunctionBegin;
2422   *a = ((Mat_MPIBAIJ *)A->data)->A;
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 /* -------------------------------------------------------------------*/
2427 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2428                                        MatGetRow_MPIBAIJ,
2429                                        MatRestoreRow_MPIBAIJ,
2430                                        MatMult_MPIBAIJ,
2431                                        /* 4*/ MatMultAdd_MPIBAIJ,
2432                                        MatMultTranspose_MPIBAIJ,
2433                                        MatMultTransposeAdd_MPIBAIJ,
2434                                        NULL,
2435                                        NULL,
2436                                        NULL,
2437                                        /*10*/ NULL,
2438                                        NULL,
2439                                        NULL,
2440                                        MatSOR_MPIBAIJ,
2441                                        MatTranspose_MPIBAIJ,
2442                                        /*15*/ MatGetInfo_MPIBAIJ,
2443                                        MatEqual_MPIBAIJ,
2444                                        MatGetDiagonal_MPIBAIJ,
2445                                        MatDiagonalScale_MPIBAIJ,
2446                                        MatNorm_MPIBAIJ,
2447                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2448                                        MatAssemblyEnd_MPIBAIJ,
2449                                        MatSetOption_MPIBAIJ,
2450                                        MatZeroEntries_MPIBAIJ,
2451                                        /*24*/ MatZeroRows_MPIBAIJ,
2452                                        NULL,
2453                                        NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        /*29*/ MatSetUp_MPIBAIJ,
2457                                        NULL,
2458                                        NULL,
2459                                        MatGetDiagonalBlock_MPIBAIJ,
2460                                        NULL,
2461                                        /*34*/ MatDuplicate_MPIBAIJ,
2462                                        NULL,
2463                                        NULL,
2464                                        NULL,
2465                                        NULL,
2466                                        /*39*/ MatAXPY_MPIBAIJ,
2467                                        MatCreateSubMatrices_MPIBAIJ,
2468                                        MatIncreaseOverlap_MPIBAIJ,
2469                                        MatGetValues_MPIBAIJ,
2470                                        MatCopy_MPIBAIJ,
2471                                        /*44*/ NULL,
2472                                        MatScale_MPIBAIJ,
2473                                        MatShift_MPIBAIJ,
2474                                        NULL,
2475                                        MatZeroRowsColumns_MPIBAIJ,
2476                                        /*49*/ NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2482                                        NULL,
2483                                        MatSetUnfactored_MPIBAIJ,
2484                                        MatPermute_MPIBAIJ,
2485                                        MatSetValuesBlocked_MPIBAIJ,
2486                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2487                                        MatDestroy_MPIBAIJ,
2488                                        MatView_MPIBAIJ,
2489                                        NULL,
2490                                        NULL,
2491                                        /*64*/ NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2497                                        NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        /*74*/ NULL,
2502                                        MatFDColoringApply_BAIJ,
2503                                        NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        /*79*/ NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        MatLoad_MPIBAIJ,
2511                                        /*84*/ NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        /*89*/ NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        /*94*/ NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        /*99*/ NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        MatConjugate_MPIBAIJ,
2530                                        NULL,
2531                                        /*104*/ NULL,
2532                                        MatRealPart_MPIBAIJ,
2533                                        MatImaginaryPart_MPIBAIJ,
2534                                        NULL,
2535                                        NULL,
2536                                        /*109*/ NULL,
2537                                        NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        MatMissingDiagonal_MPIBAIJ,
2541                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2542                                        NULL,
2543                                        MatGetGhosts_MPIBAIJ,
2544                                        NULL,
2545                                        NULL,
2546                                        /*119*/ NULL,
2547                                        NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        MatGetMultiProcBlock_MPIBAIJ,
2551                                        /*124*/ NULL,
2552                                        MatGetColumnReductions_MPIBAIJ,
2553                                        MatInvertBlockDiagonal_MPIBAIJ,
2554                                        NULL,
2555                                        NULL,
2556                                        /*129*/ NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        /*134*/ NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        /*139*/ MatSetBlockSizes_Default,
2567                                        NULL,
2568                                        NULL,
2569                                        MatFDColoringSetUp_MPIXAIJ,
2570                                        NULL,
2571                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                        /*150*/ NULL,
2578                                        NULL};
2579 
2580 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2581 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2582 
2583 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2584 {
2585   PetscInt        m, rstart, cstart, cend;
2586   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2587   const PetscInt *JJ          = NULL;
2588   PetscScalar    *values      = NULL;
2589   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2590   PetscBool       nooffprocentries;
2591 
2592   PetscFunctionBegin;
2593   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2594   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2595   PetscCall(PetscLayoutSetUp(B->rmap));
2596   PetscCall(PetscLayoutSetUp(B->cmap));
2597   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2598   m      = B->rmap->n / bs;
2599   rstart = B->rmap->rstart / bs;
2600   cstart = B->cmap->rstart / bs;
2601   cend   = B->cmap->rend / bs;
2602 
2603   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2604   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2605   for (i = 0; i < m; i++) {
2606     nz = ii[i + 1] - ii[i];
2607     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2608     nz_max = PetscMax(nz_max, nz);
2609     dlen   = 0;
2610     olen   = 0;
2611     JJ     = jj + ii[i];
2612     for (j = 0; j < nz; j++) {
2613       if (*JJ < cstart || *JJ >= cend) olen++;
2614       else dlen++;
2615       JJ++;
2616     }
2617     d_nnz[i] = dlen;
2618     o_nnz[i] = olen;
2619   }
2620   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2621   PetscCall(PetscFree2(d_nnz, o_nnz));
2622 
2623   values = (PetscScalar *)V;
2624   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2625   for (i = 0; i < m; i++) {
2626     PetscInt        row   = i + rstart;
2627     PetscInt        ncols = ii[i + 1] - ii[i];
2628     const PetscInt *icols = jj + ii[i];
2629     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2630       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2631       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2632     } else { /* block ordering does not match so we can only insert one block at a time. */
2633       PetscInt j;
2634       for (j = 0; j < ncols; j++) {
2635         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2636         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2637       }
2638     }
2639   }
2640 
2641   if (!V) PetscCall(PetscFree(values));
2642   nooffprocentries    = B->nooffprocentries;
2643   B->nooffprocentries = PETSC_TRUE;
2644   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2645   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2646   B->nooffprocentries = nooffprocentries;
2647 
2648   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2649   PetscFunctionReturn(0);
2650 }
2651 
2652 /*@C
2653    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2654 
2655    Collective
2656 
2657    Input Parameters:
2658 +  B - the matrix
2659 .  bs - the block size
2660 .  i - the indices into j for the start of each local row (starts with zero)
2661 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2662 -  v - optional values in the matrix
2663 
2664    Level: advanced
2665 
2666    Notes:
2667     The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2668    may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2669    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2670    `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2671    block column and the second index is over columns within a block.
2672 
2673    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2674 
2675 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2676 @*/
2677 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2678 {
2679   PetscFunctionBegin;
2680   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2681   PetscValidType(B, 1);
2682   PetscValidLogicalCollectiveInt(B, bs, 2);
2683   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2684   PetscFunctionReturn(0);
2685 }
2686 
2687 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2688 {
2689   Mat_MPIBAIJ *b;
2690   PetscInt     i;
2691   PetscMPIInt  size;
2692 
2693   PetscFunctionBegin;
2694   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2695   PetscCall(PetscLayoutSetUp(B->rmap));
2696   PetscCall(PetscLayoutSetUp(B->cmap));
2697   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2698 
2699   if (d_nnz) {
2700     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2701   }
2702   if (o_nnz) {
2703     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2704   }
2705 
2706   b      = (Mat_MPIBAIJ *)B->data;
2707   b->bs2 = bs * bs;
2708   b->mbs = B->rmap->n / bs;
2709   b->nbs = B->cmap->n / bs;
2710   b->Mbs = B->rmap->N / bs;
2711   b->Nbs = B->cmap->N / bs;
2712 
2713   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2714   b->rstartbs = B->rmap->rstart / bs;
2715   b->rendbs   = B->rmap->rend / bs;
2716   b->cstartbs = B->cmap->rstart / bs;
2717   b->cendbs   = B->cmap->rend / bs;
2718 
2719 #if defined(PETSC_USE_CTABLE)
2720   PetscCall(PetscHMapIDestroy(&b->colmap));
2721 #else
2722   PetscCall(PetscFree(b->colmap));
2723 #endif
2724   PetscCall(PetscFree(b->garray));
2725   PetscCall(VecDestroy(&b->lvec));
2726   PetscCall(VecScatterDestroy(&b->Mvctx));
2727 
2728   /* Because the B will have been resized we simply destroy it and create a new one each time */
2729   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2730   PetscCall(MatDestroy(&b->B));
2731   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2732   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2733   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2734 
2735   if (!B->preallocated) {
2736     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2737     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2738     PetscCall(MatSetType(b->A, MATSEQBAIJ));
2739     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2740   }
2741 
2742   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2743   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2744   B->preallocated  = PETSC_TRUE;
2745   B->was_assembled = PETSC_FALSE;
2746   B->assembled     = PETSC_FALSE;
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2751 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2752 
2753 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2754 {
2755   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2756   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2757   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2758   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2759 
2760   PetscFunctionBegin;
2761   PetscCall(PetscMalloc1(M + 1, &ii));
2762   ii[0] = 0;
2763   for (i = 0; i < M; i++) {
2764     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2765     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2766     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2767     /* remove one from count of matrix has diagonal */
2768     for (j = id[i]; j < id[i + 1]; j++) {
2769       if (jd[j] == i) {
2770         ii[i + 1]--;
2771         break;
2772       }
2773     }
2774   }
2775   PetscCall(PetscMalloc1(ii[M], &jj));
2776   cnt = 0;
2777   for (i = 0; i < M; i++) {
2778     for (j = io[i]; j < io[i + 1]; j++) {
2779       if (garray[jo[j]] > rstart) break;
2780       jj[cnt++] = garray[jo[j]];
2781     }
2782     for (k = id[i]; k < id[i + 1]; k++) {
2783       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2784     }
2785     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2786   }
2787   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2788   PetscFunctionReturn(0);
2789 }
2790 
2791 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2792 
2793 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2794 
2795 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2796 {
2797   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2798   Mat_MPIAIJ  *b;
2799   Mat          B;
2800 
2801   PetscFunctionBegin;
2802   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2803 
2804   if (reuse == MAT_REUSE_MATRIX) {
2805     B = *newmat;
2806   } else {
2807     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2808     PetscCall(MatSetType(B, MATMPIAIJ));
2809     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2810     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2811     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2812     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2813   }
2814   b = (Mat_MPIAIJ *)B->data;
2815 
2816   if (reuse == MAT_REUSE_MATRIX) {
2817     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2818     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2819   } else {
2820     PetscBool3 sym = A->symmetric, hermitian = A->hermitian, structurally_symmetric = A->structurally_symmetric, spd = A->spd;
2821     PetscCall(MatDestroy(&b->A));
2822     PetscCall(MatDestroy(&b->B));
2823     PetscCall(MatDisAssemble_MPIBAIJ(A));
2824     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2825     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2826     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2827     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2828     A->symmetric              = sym;
2829     A->hermitian              = hermitian;
2830     A->structurally_symmetric = structurally_symmetric;
2831     A->spd                    = spd;
2832   }
2833   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2834   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2835 
2836   if (reuse == MAT_INPLACE_MATRIX) {
2837     PetscCall(MatHeaderReplace(A, &B));
2838   } else {
2839     *newmat = B;
2840   }
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 /*MC
2845    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2846 
2847    Options Database Keys:
2848 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2849 . -mat_block_size <bs> - set the blocksize used to store the matrix
2850 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2851 - -mat_use_hash_table <fact> - set hash table factor
2852 
2853    Level: beginner
2854 
2855    Note:
2856     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
2857     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2858 
2859 .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ`
2860 M*/
2861 
2862 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2863 
2864 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2865 {
2866   Mat_MPIBAIJ *b;
2867   PetscBool    flg = PETSC_FALSE;
2868 
2869   PetscFunctionBegin;
2870   PetscCall(PetscNew(&b));
2871   B->data = (void *)b;
2872 
2873   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2874   B->assembled = PETSC_FALSE;
2875 
2876   B->insertmode = NOT_SET_VALUES;
2877   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2878   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2879 
2880   /* build local table of row and column ownerships */
2881   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2882 
2883   /* build cache for off array entries formed */
2884   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2885 
2886   b->donotstash  = PETSC_FALSE;
2887   b->colmap      = NULL;
2888   b->garray      = NULL;
2889   b->roworiented = PETSC_TRUE;
2890 
2891   /* stuff used in block assembly */
2892   b->barray = NULL;
2893 
2894   /* stuff used for matrix vector multiply */
2895   b->lvec  = NULL;
2896   b->Mvctx = NULL;
2897 
2898   /* stuff for MatGetRow() */
2899   b->rowindices   = NULL;
2900   b->rowvalues    = NULL;
2901   b->getrowactive = PETSC_FALSE;
2902 
2903   /* hash table stuff */
2904   b->ht           = NULL;
2905   b->hd           = NULL;
2906   b->ht_size      = 0;
2907   b->ht_flag      = PETSC_FALSE;
2908   b->ht_fact      = 0;
2909   b->ht_total_ct  = 0;
2910   b->ht_insert_ct = 0;
2911 
2912   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2913   b->ijonly = PETSC_FALSE;
2914 
2915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2918 #if defined(PETSC_HAVE_HYPRE)
2919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2920 #endif
2921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2928   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2929 
2930   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2931   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2932   if (flg) {
2933     PetscReal fact = 1.39;
2934     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2935     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2936     if (fact <= 1.0) fact = 1.39;
2937     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2938     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2939   }
2940   PetscOptionsEnd();
2941   PetscFunctionReturn(0);
2942 }
2943 
2944 /*MC
2945    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2946 
2947    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2948    and `MATMPIBAIJ` otherwise.
2949 
2950    Options Database Keys:
2951 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2952 
2953   Level: beginner
2954 
2955 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2956 M*/
2957 
2958 /*@C
2959    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2960    (block compressed row).  For good matrix assembly performance
2961    the user should preallocate the matrix storage by setting the parameters
2962    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2963    performance can be increased by more than a factor of 50.
2964 
2965    Collective
2966 
2967    Input Parameters:
2968 +  B - the matrix
2969 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2970           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2971 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2972            submatrix  (same for all local rows)
2973 .  d_nnz - array containing the number of block nonzeros in the various block rows
2974            of the in diagonal portion of the local (possibly different for each block
2975            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2976            set it even if it is zero.
2977 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2978            submatrix (same for all local rows).
2979 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2980            off-diagonal portion of the local submatrix (possibly different for
2981            each block row) or NULL.
2982 
2983    If the *_nnz parameter is given then the *_nz parameter is ignored
2984 
2985    Options Database Keys:
2986 +   -mat_block_size - size of the blocks to use
2987 -   -mat_use_hash_table <fact> - set hash table factor
2988 
2989    Notes:
2990    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2991    than it must be used on all processors that share the object for that argument.
2992 
2993    Storage Information:
2994    For a square global matrix we define each processor's diagonal portion
2995    to be its local rows and the corresponding columns (a square submatrix);
2996    each processor's off-diagonal portion encompasses the remainder of the
2997    local matrix (a rectangular submatrix).
2998 
2999    The user can specify preallocated storage for the diagonal part of
3000    the local submatrix with either d_nz or d_nnz (not both).  Set
3001    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3002    memory allocation.  Likewise, specify preallocated storage for the
3003    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3004 
3005    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3006    the figure below we depict these three local rows and all columns (0-11).
3007 
3008 .vb
3009            0 1 2 3 4 5 6 7 8 9 10 11
3010           --------------------------
3011    row 3  |o o o d d d o o o o  o  o
3012    row 4  |o o o d d d o o o o  o  o
3013    row 5  |o o o d d d o o o o  o  o
3014           --------------------------
3015 .ve
3016 
3017    Thus, any entries in the d locations are stored in the d (diagonal)
3018    submatrix, and any entries in the o locations are stored in the
3019    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3020    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3021 
3022    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3023    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3024    In general, for PDE problems in which most nonzeros are near the diagonal,
3025    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3026    or you will get TERRIBLE performance; see the users' manual chapter on
3027    matrices.
3028 
3029    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3030    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3031    You can also run with the option -info and look for messages with the string
3032    malloc in them to see if additional memory allocation was needed.
3033 
3034    Level: intermediate
3035 
3036 .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3037 @*/
3038 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3039 {
3040   PetscFunctionBegin;
3041   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3042   PetscValidType(B, 1);
3043   PetscValidLogicalCollectiveInt(B, bs, 2);
3044   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 /*@C
3049    MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3050    (block compressed row).  For good matrix assembly performance
3051    the user should preallocate the matrix storage by setting the parameters
3052    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3053    performance can be increased by more than a factor of 50.
3054 
3055    Collective
3056 
3057    Input Parameters:
3058 +  comm - MPI communicator
3059 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3060           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3061 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3062            This value should be the same as the local size used in creating the
3063            y vector for the matrix-vector product y = Ax.
3064 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3065            This value should be the same as the local size used in creating the
3066            x vector for the matrix-vector product y = Ax.
3067 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3068 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3069 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3070            submatrix  (same for all local rows)
3071 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3072            of the in diagonal portion of the local (possibly different for each block
3073            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3074            and set it even if it is zero.
3075 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3076            submatrix (same for all local rows).
3077 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3078            off-diagonal portion of the local submatrix (possibly different for
3079            each block row) or NULL.
3080 
3081    Output Parameter:
3082 .  A - the matrix
3083 
3084    Options Database Keys:
3085 +   -mat_block_size - size of the blocks to use
3086 -   -mat_use_hash_table <fact> - set hash table factor
3087 
3088    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3089    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3090    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3091 
3092    Notes:
3093    If the *_nnz parameter is given then the *_nz parameter is ignored
3094 
3095    A nonzero block is any block that as 1 or more nonzeros in it
3096 
3097    The user MUST specify either the local or global matrix dimensions
3098    (possibly both).
3099 
3100    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3101    than it must be used on all processors that share the object for that argument.
3102 
3103    Storage Information:
3104    For a square global matrix we define each processor's diagonal portion
3105    to be its local rows and the corresponding columns (a square submatrix);
3106    each processor's off-diagonal portion encompasses the remainder of the
3107    local matrix (a rectangular submatrix).
3108 
3109    The user can specify preallocated storage for the diagonal part of
3110    the local submatrix with either d_nz or d_nnz (not both).  Set
3111    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3112    memory allocation.  Likewise, specify preallocated storage for the
3113    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3114 
3115    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3116    the figure below we depict these three local rows and all columns (0-11).
3117 
3118 .vb
3119            0 1 2 3 4 5 6 7 8 9 10 11
3120           --------------------------
3121    row 3  |o o o d d d o o o o  o  o
3122    row 4  |o o o d d d o o o o  o  o
3123    row 5  |o o o d d d o o o o  o  o
3124           --------------------------
3125 .ve
3126 
3127    Thus, any entries in the d locations are stored in the d (diagonal)
3128    submatrix, and any entries in the o locations are stored in the
3129    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3130    stored simply in the `MATSEQBAIJ` format for compressed row storage.
3131 
3132    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3133    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3134    In general, for PDE problems in which most nonzeros are near the diagonal,
3135    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3136    or you will get TERRIBLE performance; see the users' manual chapter on
3137    matrices.
3138 
3139    Level: intermediate
3140 
3141 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3142 @*/
3143 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3144 {
3145   PetscMPIInt size;
3146 
3147   PetscFunctionBegin;
3148   PetscCall(MatCreate(comm, A));
3149   PetscCall(MatSetSizes(*A, m, n, M, N));
3150   PetscCallMPI(MPI_Comm_size(comm, &size));
3151   if (size > 1) {
3152     PetscCall(MatSetType(*A, MATMPIBAIJ));
3153     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3154   } else {
3155     PetscCall(MatSetType(*A, MATSEQBAIJ));
3156     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3157   }
3158   PetscFunctionReturn(0);
3159 }
3160 
3161 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3162 {
3163   Mat          mat;
3164   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3165   PetscInt     len = 0;
3166 
3167   PetscFunctionBegin;
3168   *newmat = NULL;
3169   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3170   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3171   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3172 
3173   mat->factortype   = matin->factortype;
3174   mat->preallocated = PETSC_TRUE;
3175   mat->assembled    = PETSC_TRUE;
3176   mat->insertmode   = NOT_SET_VALUES;
3177 
3178   a             = (Mat_MPIBAIJ *)mat->data;
3179   mat->rmap->bs = matin->rmap->bs;
3180   a->bs2        = oldmat->bs2;
3181   a->mbs        = oldmat->mbs;
3182   a->nbs        = oldmat->nbs;
3183   a->Mbs        = oldmat->Mbs;
3184   a->Nbs        = oldmat->Nbs;
3185 
3186   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3187   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3188 
3189   a->size         = oldmat->size;
3190   a->rank         = oldmat->rank;
3191   a->donotstash   = oldmat->donotstash;
3192   a->roworiented  = oldmat->roworiented;
3193   a->rowindices   = NULL;
3194   a->rowvalues    = NULL;
3195   a->getrowactive = PETSC_FALSE;
3196   a->barray       = NULL;
3197   a->rstartbs     = oldmat->rstartbs;
3198   a->rendbs       = oldmat->rendbs;
3199   a->cstartbs     = oldmat->cstartbs;
3200   a->cendbs       = oldmat->cendbs;
3201 
3202   /* hash table stuff */
3203   a->ht           = NULL;
3204   a->hd           = NULL;
3205   a->ht_size      = 0;
3206   a->ht_flag      = oldmat->ht_flag;
3207   a->ht_fact      = oldmat->ht_fact;
3208   a->ht_total_ct  = 0;
3209   a->ht_insert_ct = 0;
3210 
3211   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3212   if (oldmat->colmap) {
3213 #if defined(PETSC_USE_CTABLE)
3214     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3215 #else
3216     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3217     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3218 #endif
3219   } else a->colmap = NULL;
3220 
3221   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3222     PetscCall(PetscMalloc1(len, &a->garray));
3223     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3224   } else a->garray = NULL;
3225 
3226   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3227   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3228   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3229 
3230   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3231   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3232   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3233   *newmat = mat;
3234   PetscFunctionReturn(0);
3235 }
3236 
3237 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3238 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3239 {
3240   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3241   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3242   PetscScalar *matvals;
3243 
3244   PetscFunctionBegin;
3245   PetscCall(PetscViewerSetUp(viewer));
3246 
3247   /* read in matrix header */
3248   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3249   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3250   M  = header[1];
3251   N  = header[2];
3252   nz = header[3];
3253   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3254   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3255   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3256 
3257   /* set block sizes from the viewer's .info file */
3258   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3259   /* set local sizes if not set already */
3260   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3261   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3262   /* set global sizes if not set already */
3263   if (mat->rmap->N < 0) mat->rmap->N = M;
3264   if (mat->cmap->N < 0) mat->cmap->N = N;
3265   PetscCall(PetscLayoutSetUp(mat->rmap));
3266   PetscCall(PetscLayoutSetUp(mat->cmap));
3267 
3268   /* check if the matrix sizes are correct */
3269   PetscCall(MatGetSize(mat, &rows, &cols));
3270   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3271   PetscCall(MatGetBlockSize(mat, &bs));
3272   PetscCall(MatGetLocalSize(mat, &m, &n));
3273   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3274   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3275   mbs = m / bs;
3276   nbs = n / bs;
3277 
3278   /* read in row lengths and build row indices */
3279   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3280   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3281   rowidxs[0] = 0;
3282   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3283   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3284   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3285 
3286   /* read in column indices and matrix values */
3287   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3288   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3289   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3290 
3291   {                /* preallocate matrix storage */
3292     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3293     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3294     PetscBool  sbaij, done;
3295     PetscInt  *d_nnz, *o_nnz;
3296 
3297     PetscCall(PetscBTCreate(nbs, &bt));
3298     PetscCall(PetscHSetICreate(&ht));
3299     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3300     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3301     for (i = 0; i < mbs; i++) {
3302       PetscCall(PetscBTMemzero(nbs, bt));
3303       PetscCall(PetscHSetIClear(ht));
3304       for (k = 0; k < bs; k++) {
3305         PetscInt row = bs * i + k;
3306         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3307           PetscInt col = colidxs[j];
3308           if (!sbaij || col >= row) {
3309             if (col >= cs && col < ce) {
3310               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3311             } else {
3312               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3313               if (done) o_nnz[i]++;
3314             }
3315           }
3316         }
3317       }
3318     }
3319     PetscCall(PetscBTDestroy(&bt));
3320     PetscCall(PetscHSetIDestroy(&ht));
3321     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3322     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3323     PetscCall(PetscFree2(d_nnz, o_nnz));
3324   }
3325 
3326   /* store matrix values */
3327   for (i = 0; i < m; i++) {
3328     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3329     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3330   }
3331 
3332   PetscCall(PetscFree(rowidxs));
3333   PetscCall(PetscFree2(colidxs, matvals));
3334   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3335   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3336   PetscFunctionReturn(0);
3337 }
3338 
3339 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3340 {
3341   PetscBool isbinary;
3342 
3343   PetscFunctionBegin;
3344   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3345   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3346   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3347   PetscFunctionReturn(0);
3348 }
3349 
3350 /*@
3351    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3352 
3353    Input Parameters:
3354 +  mat  - the matrix
3355 -  fact - factor
3356 
3357    Options Database Key:
3358 .  -mat_use_hash_table <fact> - provide the factor
3359 
3360    Level: advanced
3361 
3362 .seealso: `MATMPIBAIJ`, `MatSetOption()`
3363 @*/
3364 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3365 {
3366   PetscFunctionBegin;
3367   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3368   PetscFunctionReturn(0);
3369 }
3370 
3371 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3372 {
3373   Mat_MPIBAIJ *baij;
3374 
3375   PetscFunctionBegin;
3376   baij          = (Mat_MPIBAIJ *)mat->data;
3377   baij->ht_fact = fact;
3378   PetscFunctionReturn(0);
3379 }
3380 
3381 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3382 {
3383   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3384   PetscBool    flg;
3385 
3386   PetscFunctionBegin;
3387   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3388   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3389   if (Ad) *Ad = a->A;
3390   if (Ao) *Ao = a->B;
3391   if (colmap) *colmap = a->garray;
3392   PetscFunctionReturn(0);
3393 }
3394 
3395 /*
3396     Special version for direct calls from Fortran (to eliminate two function call overheads
3397 */
3398 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3399   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3400 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3401   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3402 #endif
3403 
3404 /*@C
3405   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3406 
3407   Collective
3408 
3409   Input Parameters:
3410 + mat - the matrix
3411 . min - number of input rows
3412 . im - input rows
3413 . nin - number of input columns
3414 . in - input columns
3415 . v - numerical values input
3416 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3417 
3418   Developer Note:
3419     This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3420 
3421   Level: advanced
3422 
3423 .seealso: `MatSetValuesBlocked()`
3424 @*/
3425 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3426 {
3427   /* convert input arguments to C version */
3428   Mat        mat = *matin;
3429   PetscInt   m = *min, n = *nin;
3430   InsertMode addv = *addvin;
3431 
3432   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3433   const MatScalar *value;
3434   MatScalar       *barray      = baij->barray;
3435   PetscBool        roworiented = baij->roworiented;
3436   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3437   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3438   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3439 
3440   PetscFunctionBegin;
3441   /* tasks normally handled by MatSetValuesBlocked() */
3442   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3443   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3444   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3445   if (mat->assembled) {
3446     mat->was_assembled = PETSC_TRUE;
3447     mat->assembled     = PETSC_FALSE;
3448   }
3449   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3450 
3451   if (!barray) {
3452     PetscCall(PetscMalloc1(bs2, &barray));
3453     baij->barray = barray;
3454   }
3455 
3456   if (roworiented) stepval = (n - 1) * bs;
3457   else stepval = (m - 1) * bs;
3458 
3459   for (i = 0; i < m; i++) {
3460     if (im[i] < 0) continue;
3461     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3462     if (im[i] >= rstart && im[i] < rend) {
3463       row = im[i] - rstart;
3464       for (j = 0; j < n; j++) {
3465         /* If NumCol = 1 then a copy is not required */
3466         if ((roworiented) && (n == 1)) {
3467           barray = (MatScalar *)v + i * bs2;
3468         } else if ((!roworiented) && (m == 1)) {
3469           barray = (MatScalar *)v + j * bs2;
3470         } else { /* Here a copy is required */
3471           if (roworiented) {
3472             value = v + i * (stepval + bs) * bs + j * bs;
3473           } else {
3474             value = v + j * (stepval + bs) * bs + i * bs;
3475           }
3476           for (ii = 0; ii < bs; ii++, value += stepval) {
3477             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3478           }
3479           barray -= bs2;
3480         }
3481 
3482         if (in[j] >= cstart && in[j] < cend) {
3483           col = in[j] - cstart;
3484           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3485         } else if (in[j] < 0) {
3486           continue;
3487         } else {
3488           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3489           if (mat->was_assembled) {
3490             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3491 
3492 #if defined(PETSC_USE_DEBUG)
3493   #if defined(PETSC_USE_CTABLE)
3494             {
3495               PetscInt data;
3496               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3497               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3498             }
3499   #else
3500             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3501   #endif
3502 #endif
3503 #if defined(PETSC_USE_CTABLE)
3504             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3505             col = (col - 1) / bs;
3506 #else
3507             col = (baij->colmap[in[j]] - 1) / bs;
3508 #endif
3509             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3510               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3511               col = in[j];
3512             }
3513           } else col = in[j];
3514           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3515         }
3516       }
3517     } else {
3518       if (!baij->donotstash) {
3519         if (roworiented) {
3520           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3521         } else {
3522           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3523         }
3524       }
3525     }
3526   }
3527 
3528   /* task normally handled by MatSetValuesBlocked() */
3529   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3530   PetscFunctionReturn(0);
3531 }
3532 
3533 /*@
3534      MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3535          CSR format the local rows.
3536 
3537    Collective
3538 
3539    Input Parameters:
3540 +  comm - MPI communicator
3541 .  bs - the block size, only a block size of 1 is supported
3542 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
3543 .  n - This value should be the same as the local size used in creating the
3544        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3545        calculated if N is given) For square matrices n is almost always m.
3546 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3547 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3548 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3549 .   j - column indices
3550 -   a - matrix values
3551 
3552    Output Parameter:
3553 .   mat - the matrix
3554 
3555    Level: intermediate
3556 
3557    Notes:
3558        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3559      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3560      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3561 
3562      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3563      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3564      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3565      with column-major ordering within blocks.
3566 
3567        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3568 
3569 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3570           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3571 @*/
3572 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3573 {
3574   PetscFunctionBegin;
3575   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3576   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3577   PetscCall(MatCreate(comm, mat));
3578   PetscCall(MatSetSizes(*mat, m, n, M, N));
3579   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3580   PetscCall(MatSetBlockSize(*mat, bs));
3581   PetscCall(MatSetUp(*mat));
3582   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3583   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3584   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3585   PetscFunctionReturn(0);
3586 }
3587 
3588 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3589 {
3590   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3591   PetscInt    *indx;
3592   PetscScalar *values;
3593 
3594   PetscFunctionBegin;
3595   PetscCall(MatGetSize(inmat, &m, &N));
3596   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3597     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3598     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3599     PetscInt    *bindx, rmax = a->rmax, j;
3600     PetscMPIInt  rank, size;
3601 
3602     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3603     mbs = m / bs;
3604     Nbs = N / cbs;
3605     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3606     nbs = n / cbs;
3607 
3608     PetscCall(PetscMalloc1(rmax, &bindx));
3609     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3610 
3611     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3612     PetscCallMPI(MPI_Comm_rank(comm, &size));
3613     if (rank == size - 1) {
3614       /* Check sum(nbs) = Nbs */
3615       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3616     }
3617 
3618     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3619     for (i = 0; i < mbs; i++) {
3620       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3621       nnz = nnz / bs;
3622       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3623       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3624       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3625     }
3626     PetscCall(PetscFree(bindx));
3627 
3628     PetscCall(MatCreate(comm, outmat));
3629     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3630     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3631     PetscCall(MatSetType(*outmat, MATBAIJ));
3632     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3633     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3634     MatPreallocateEnd(dnz, onz);
3635     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3636   }
3637 
3638   /* numeric phase */
3639   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3640   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3641 
3642   for (i = 0; i < m; i++) {
3643     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3644     Ii = i + rstart;
3645     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3646     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3647   }
3648   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3649   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3650   PetscFunctionReturn(0);
3651 }
3652