xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 5a67bb2bab7ce296578be4e8d1213f21203fd3df)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
101 {
102   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
103 
104   PetscFunctionBegin;
105   PetscCall(MatStoreValues(aij->A));
106   PetscCall(MatStoreValues(aij->B));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
111 {
112   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
113 
114   PetscFunctionBegin;
115   PetscCall(MatRetrieveValues(aij->A));
116   PetscCall(MatRetrieveValues(aij->B));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 /*
121      Local utility routine that creates a mapping from the global column
122    number to the local number in the off-diagonal part of the local
123    storage of the matrix.  This is done in a non scalable way since the
124    length of colmap equals the global matrix length.
125 */
126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
127 {
128   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
129   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
130   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
131 
132   PetscFunctionBegin;
133 #if defined(PETSC_USE_CTABLE)
134   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
135   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
136 #else
137   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
138   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
139 #endif
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
144   do { \
145     brow = row / bs; \
146     rp   = aj + ai[brow]; \
147     ap   = aa + bs2 * ai[brow]; \
148     rmax = aimax[brow]; \
149     nrow = ailen[brow]; \
150     bcol = col / bs; \
151     ridx = row % bs; \
152     cidx = col % bs; \
153     low  = 0; \
154     high = nrow; \
155     while (high - low > 3) { \
156       t = (low + high) / 2; \
157       if (rp[t] > bcol) high = t; \
158       else low = t; \
159     } \
160     for (_i = low; _i < high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap + bs2 * _i + bs * cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value; \
165         else *bap = value; \
166         goto a_noinsert; \
167       } \
168     } \
169     if (a->nonew == 1) goto a_noinsert; \
170     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
171     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
172     N = nrow++ - 1; \
173     /* shift up all the later entries in this row */ \
174     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
175     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
176     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
177     rp[_i]                          = bcol; \
178     ap[bs2 * _i + bs * cidx + ridx] = value; \
179   a_noinsert:; \
180     ailen[brow] = nrow; \
181   } while (0)
182 
183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
184   do { \
185     brow = row / bs; \
186     rp   = bj + bi[brow]; \
187     ap   = ba + bs2 * bi[brow]; \
188     rmax = bimax[brow]; \
189     nrow = bilen[brow]; \
190     bcol = col / bs; \
191     ridx = row % bs; \
192     cidx = col % bs; \
193     low  = 0; \
194     high = nrow; \
195     while (high - low > 3) { \
196       t = (low + high) / 2; \
197       if (rp[t] > bcol) high = t; \
198       else low = t; \
199     } \
200     for (_i = low; _i < high; _i++) { \
201       if (rp[_i] > bcol) break; \
202       if (rp[_i] == bcol) { \
203         bap = ap + bs2 * _i + bs * cidx + ridx; \
204         if (addv == ADD_VALUES) *bap += value; \
205         else *bap = value; \
206         goto b_noinsert; \
207       } \
208     } \
209     if (b->nonew == 1) goto b_noinsert; \
210     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
211     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
212     N = nrow++ - 1; \
213     /* shift up all the later entries in this row */ \
214     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
215     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
216     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
217     rp[_i]                          = bcol; \
218     ap[bs2 * _i + bs * cidx + ridx] = value; \
219   b_noinsert:; \
220     bilen[brow] = nrow; \
221   } while (0)
222 
223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
224 {
225   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
226   MatScalar    value;
227   PetscBool    roworiented = baij->roworiented;
228   PetscInt     i, j, row, col;
229   PetscInt     rstart_orig = mat->rmap->rstart;
230   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
231   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
232 
233   /* Some Variables required in the macro */
234   Mat          A     = baij->A;
235   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
236   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
237   MatScalar   *aa = a->a;
238 
239   Mat          B     = baij->B;
240   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
241   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
242   MatScalar   *ba = b->a;
243 
244   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
245   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
246   MatScalar *ap, *bap;
247 
248   PetscFunctionBegin;
249   for (i = 0; i < m; i++) {
250     if (im[i] < 0) continue;
251     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
252     if (im[i] >= rstart_orig && im[i] < rend_orig) {
253       row = im[i] - rstart_orig;
254       for (j = 0; j < n; j++) {
255         if (in[j] >= cstart_orig && in[j] < cend_orig) {
256           col = in[j] - cstart_orig;
257           if (roworiented) value = v[i * n + j];
258           else value = v[i + j * m];
259           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
260         } else if (in[j] < 0) {
261           continue;
262         } else {
263           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
264           if (mat->was_assembled) {
265             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
266 #if defined(PETSC_USE_CTABLE)
267             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
268             col = col - 1;
269 #else
270             col = baij->colmap[in[j] / bs] - 1;
271 #endif
272             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
273               PetscCall(MatDisAssemble_MPIBAIJ(mat));
274               col = in[j];
275               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
276               B     = baij->B;
277               b     = (Mat_SeqBAIJ *)(B)->data;
278               bimax = b->imax;
279               bi    = b->i;
280               bilen = b->ilen;
281               bj    = b->j;
282               ba    = b->a;
283             } else {
284               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
285               col += in[j] % bs;
286             }
287           } else col = in[j];
288           if (roworiented) value = v[i * n + j];
289           else value = v[i + j * m];
290           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
291           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
292         }
293       }
294     } else {
295       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
296       if (!baij->donotstash) {
297         mat->assembled = PETSC_FALSE;
298         if (roworiented) {
299           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
300         } else {
301           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
302         }
303       }
304     }
305   }
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
310 {
311   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
312   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
313   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
314   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
315   PetscBool          roworiented = a->roworiented;
316   const PetscScalar *value       = v;
317   MatScalar         *ap, *aa = a->a, *bap;
318 
319   PetscFunctionBegin;
320   rp    = aj + ai[row];
321   ap    = aa + bs2 * ai[row];
322   rmax  = imax[row];
323   nrow  = ailen[row];
324   value = v;
325   low   = 0;
326   high  = nrow;
327   while (high - low > 7) {
328     t = (low + high) / 2;
329     if (rp[t] > col) high = t;
330     else low = t;
331   }
332   for (i = low; i < high; i++) {
333     if (rp[i] > col) break;
334     if (rp[i] == col) {
335       bap = ap + bs2 * i;
336       if (roworiented) {
337         if (is == ADD_VALUES) {
338           for (ii = 0; ii < bs; ii++) {
339             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
340           }
341         } else {
342           for (ii = 0; ii < bs; ii++) {
343             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
344           }
345         }
346       } else {
347         if (is == ADD_VALUES) {
348           for (ii = 0; ii < bs; ii++, value += bs) {
349             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
350             bap += bs;
351           }
352         } else {
353           for (ii = 0; ii < bs; ii++, value += bs) {
354             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
355             bap += bs;
356           }
357         }
358       }
359       goto noinsert2;
360     }
361   }
362   if (nonew == 1) goto noinsert2;
363   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
364   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
365   N = nrow++ - 1;
366   high++;
367   /* shift up all the later entries in this row */
368   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
369   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
370   rp[i] = col;
371   bap   = ap + bs2 * i;
372   if (roworiented) {
373     for (ii = 0; ii < bs; ii++) {
374       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
375     }
376   } else {
377     for (ii = 0; ii < bs; ii++) {
378       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
379     }
380   }
381 noinsert2:;
382   ailen[row] = nrow;
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 /*
387     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
388     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
389 */
390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
391 {
392   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
393   const PetscScalar *value;
394   MatScalar         *barray      = baij->barray;
395   PetscBool          roworiented = baij->roworiented;
396   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
397   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
398   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
399 
400   PetscFunctionBegin;
401   if (!barray) {
402     PetscCall(PetscMalloc1(bs2, &barray));
403     baij->barray = barray;
404   }
405 
406   if (roworiented) stepval = (n - 1) * bs;
407   else stepval = (m - 1) * bs;
408 
409   for (i = 0; i < m; i++) {
410     if (im[i] < 0) continue;
411     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
412     if (im[i] >= rstart && im[i] < rend) {
413       row = im[i] - rstart;
414       for (j = 0; j < n; j++) {
415         /* If NumCol = 1 then a copy is not required */
416         if ((roworiented) && (n == 1)) {
417           barray = (MatScalar *)v + i * bs2;
418         } else if ((!roworiented) && (m == 1)) {
419           barray = (MatScalar *)v + j * bs2;
420         } else { /* Here a copy is required */
421           if (roworiented) {
422             value = v + (i * (stepval + bs) + j) * bs;
423           } else {
424             value = v + (j * (stepval + bs) + i) * bs;
425           }
426           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
427             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
428             barray += bs;
429           }
430           barray -= bs2;
431         }
432 
433         if (in[j] >= cstart && in[j] < cend) {
434           col = in[j] - cstart;
435           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
436         } else if (in[j] < 0) {
437           continue;
438         } else {
439           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
440           if (mat->was_assembled) {
441             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
442 
443 #if defined(PETSC_USE_DEBUG)
444   #if defined(PETSC_USE_CTABLE)
445             {
446               PetscInt data;
447               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
448               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
449             }
450   #else
451             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
452   #endif
453 #endif
454 #if defined(PETSC_USE_CTABLE)
455             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
456             col = (col - 1) / bs;
457 #else
458             col = (baij->colmap[in[j]] - 1) / bs;
459 #endif
460             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
461               PetscCall(MatDisAssemble_MPIBAIJ(mat));
462               col = in[j];
463             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
464           } else col = in[j];
465           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
466         }
467       }
468     } else {
469       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
470       if (!baij->donotstash) {
471         if (roworiented) {
472           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
473         } else {
474           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475         }
476       }
477     }
478   }
479   PetscFunctionReturn(PETSC_SUCCESS);
480 }
481 
482 #define HASH_KEY             0.6180339887
483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
487 {
488   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
489   PetscBool    roworiented = baij->roworiented;
490   PetscInt     i, j, row, col;
491   PetscInt     rstart_orig = mat->rmap->rstart;
492   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
493   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
494   PetscReal    tmp;
495   MatScalar  **HD       = baij->hd, value;
496   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
497 
498   PetscFunctionBegin;
499   for (i = 0; i < m; i++) {
500     if (PetscDefined(USE_DEBUG)) {
501       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
502       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
503     }
504     row = im[i];
505     if (row >= rstart_orig && row < rend_orig) {
506       for (j = 0; j < n; j++) {
507         col = in[j];
508         if (roworiented) value = v[i * n + j];
509         else value = v[i + j * m];
510         /* Look up PetscInto the Hash Table */
511         key = (row / bs) * Nbs + (col / bs) + 1;
512         h1  = HASH(size, key, tmp);
513 
514         idx = h1;
515         if (PetscDefined(USE_DEBUG)) {
516           insert_ct++;
517           total_ct++;
518           if (HT[idx] != key) {
519             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
520               ;
521             if (idx == size) {
522               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
523                 ;
524               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
525             }
526           }
527         } else if (HT[idx] != key) {
528           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
529             ;
530           if (idx == size) {
531             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
532               ;
533             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
534           }
535         }
536         /* A HASH table entry is found, so insert the values at the correct address */
537         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
538         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
539       }
540     } else if (!baij->donotstash) {
541       if (roworiented) {
542         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
543       } else {
544         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
545       }
546     }
547   }
548   if (PetscDefined(USE_DEBUG)) {
549     baij->ht_total_ct += total_ct;
550     baij->ht_insert_ct += insert_ct;
551   }
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
556 {
557   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
558   PetscBool          roworiented = baij->roworiented;
559   PetscInt           i, j, ii, jj, row, col;
560   PetscInt           rstart = baij->rstartbs;
561   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
562   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
563   PetscReal          tmp;
564   MatScalar        **HD = baij->hd, *baij_a;
565   const PetscScalar *v_t, *value;
566   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
567 
568   PetscFunctionBegin;
569   if (roworiented) stepval = (n - 1) * bs;
570   else stepval = (m - 1) * bs;
571 
572   for (i = 0; i < m; i++) {
573     if (PetscDefined(USE_DEBUG)) {
574       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
575       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
576     }
577     row = im[i];
578     v_t = v + i * nbs2;
579     if (row >= rstart && row < rend) {
580       for (j = 0; j < n; j++) {
581         col = in[j];
582 
583         /* Look up into the Hash Table */
584         key = row * Nbs + col + 1;
585         h1  = HASH(size, key, tmp);
586 
587         idx = h1;
588         if (PetscDefined(USE_DEBUG)) {
589           total_ct++;
590           insert_ct++;
591           if (HT[idx] != key) {
592             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
593               ;
594             if (idx == size) {
595               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
596                 ;
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
602             ;
603           if (idx == size) {
604             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
605               ;
606             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
607           }
608         }
609         baij_a = HD[idx];
610         if (roworiented) {
611           /*value = v + i*(stepval+bs)*bs + j*bs;*/
612           /* value = v + (i*(stepval+bs)+j)*bs; */
613           value = v_t;
614           v_t += bs;
615           if (addv == ADD_VALUES) {
616             for (ii = 0; ii < bs; ii++, value += stepval) {
617               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
618             }
619           } else {
620             for (ii = 0; ii < bs; ii++, value += stepval) {
621               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
622             }
623           }
624         } else {
625           value = v + j * (stepval + bs) * bs + i * bs;
626           if (addv == ADD_VALUES) {
627             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
628               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
629             }
630           } else {
631             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
632               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
633             }
634           }
635         }
636       }
637     } else {
638       if (!baij->donotstash) {
639         if (roworiented) {
640           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         } else {
642           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         }
644       }
645     }
646   }
647   if (PetscDefined(USE_DEBUG)) {
648     baij->ht_total_ct += total_ct;
649     baij->ht_insert_ct += insert_ct;
650   }
651   PetscFunctionReturn(PETSC_SUCCESS);
652 }
653 
654 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
655 {
656   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
657   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
658   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
659 
660   PetscFunctionBegin;
661   for (i = 0; i < m; i++) {
662     if (idxm[i] < 0) continue; /* negative row */
663     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
664     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
665       row = idxm[i] - bsrstart;
666       for (j = 0; j < n; j++) {
667         if (idxn[j] < 0) continue; /* negative column */
668         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
669         if (idxn[j] >= bscstart && idxn[j] < bscend) {
670           col = idxn[j] - bscstart;
671           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
672         } else {
673           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
674 #if defined(PETSC_USE_CTABLE)
675           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
676           data--;
677 #else
678           data = baij->colmap[idxn[j] / bs] - 1;
679 #endif
680           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
681           else {
682             col = data + idxn[j] % bs;
683             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
684           }
685         }
686       }
687     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
688   }
689   PetscFunctionReturn(PETSC_SUCCESS);
690 }
691 
692 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
693 {
694   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
695   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
696   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
697   PetscReal    sum = 0.0;
698   MatScalar   *v;
699 
700   PetscFunctionBegin;
701   if (baij->size == 1) {
702     PetscCall(MatNorm(baij->A, type, nrm));
703   } else {
704     if (type == NORM_FROBENIUS) {
705       v  = amat->a;
706       nz = amat->nz * bs2;
707       for (i = 0; i < nz; i++) {
708         sum += PetscRealPart(PetscConj(*v) * (*v));
709         v++;
710       }
711       v  = bmat->a;
712       nz = bmat->nz * bs2;
713       for (i = 0; i < nz; i++) {
714         sum += PetscRealPart(PetscConj(*v) * (*v));
715         v++;
716       }
717       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
718       *nrm = PetscSqrtReal(*nrm);
719     } else if (type == NORM_1) { /* max column sum */
720       PetscReal *tmp, *tmp2;
721       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
722       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
723       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           col = bs * (cstart + *jj) + j; /* column index */
729           for (row = 0; row < bs; row++) {
730             tmp[col] += PetscAbsScalar(*v);
731             v++;
732           }
733         }
734         jj++;
735       }
736       v  = bmat->a;
737       jj = bmat->j;
738       for (i = 0; i < bmat->nz; i++) {
739         for (j = 0; j < bs; j++) {
740           col = bs * garray[*jj] + j;
741           for (row = 0; row < bs; row++) {
742             tmp[col] += PetscAbsScalar(*v);
743             v++;
744           }
745         }
746         jj++;
747       }
748       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
749       *nrm = 0.0;
750       for (j = 0; j < mat->cmap->N; j++) {
751         if (tmp2[j] > *nrm) *nrm = tmp2[j];
752       }
753       PetscCall(PetscFree(tmp));
754       PetscCall(PetscFree(tmp2));
755     } else if (type == NORM_INFINITY) { /* max row sum */
756       PetscReal *sums;
757       PetscCall(PetscMalloc1(bs, &sums));
758       sum = 0.0;
759       for (j = 0; j < amat->mbs; j++) {
760         for (row = 0; row < bs; row++) sums[row] = 0.0;
761         v  = amat->a + bs2 * amat->i[j];
762         nz = amat->i[j + 1] - amat->i[j];
763         for (i = 0; i < nz; i++) {
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               sums[row] += PetscAbsScalar(*v);
767               v++;
768             }
769           }
770         }
771         v  = bmat->a + bs2 * bmat->i[j];
772         nz = bmat->i[j + 1] - bmat->i[j];
773         for (i = 0; i < nz; i++) {
774           for (col = 0; col < bs; col++) {
775             for (row = 0; row < bs; row++) {
776               sums[row] += PetscAbsScalar(*v);
777               v++;
778             }
779           }
780         }
781         for (row = 0; row < bs; row++) {
782           if (sums[row] > sum) sum = sums[row];
783         }
784       }
785       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786       PetscCall(PetscFree(sums));
787     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788   }
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 /*
793   Creates the hash table, and sets the table
794   This table is created only once.
795   If new entried need to be added to the matrix
796   then the hash table has to be destroyed and
797   recreated.
798 */
799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800 {
801   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802   Mat          A = baij->A, B = baij->B;
803   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807   PetscInt    *HT, key;
808   MatScalar  **HD;
809   PetscReal    tmp;
810 #if defined(PETSC_USE_INFO)
811   PetscInt ct = 0, max = 0;
812 #endif
813 
814   PetscFunctionBegin;
815   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
816 
817   baij->ht_size = (PetscInt)(factor * nz);
818   ht_size       = baij->ht_size;
819 
820   /* Allocate Memory for Hash Table */
821   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822   HD = baij->hd;
823   HT = baij->ht;
824 
825   /* Loop Over A */
826   for (i = 0; i < a->mbs; i++) {
827     for (j = ai[i]; j < ai[i + 1]; j++) {
828       row = i + rstart;
829       col = aj[j] + cstart;
830 
831       key = row * Nbs + col + 1;
832       h1  = HASH(ht_size, key, tmp);
833       for (k = 0; k < ht_size; k++) {
834         if (!HT[(h1 + k) % ht_size]) {
835           HT[(h1 + k) % ht_size] = key;
836           HD[(h1 + k) % ht_size] = a->a + j * bs2;
837           break;
838 #if defined(PETSC_USE_INFO)
839         } else {
840           ct++;
841 #endif
842         }
843       }
844 #if defined(PETSC_USE_INFO)
845       if (k > max) max = k;
846 #endif
847     }
848   }
849   /* Loop Over B */
850   for (i = 0; i < b->mbs; i++) {
851     for (j = bi[i]; j < bi[i + 1]; j++) {
852       row = i + rstart;
853       col = garray[bj[j]];
854       key = row * Nbs + col + 1;
855       h1  = HASH(ht_size, key, tmp);
856       for (k = 0; k < ht_size; k++) {
857         if (!HT[(h1 + k) % ht_size]) {
858           HT[(h1 + k) % ht_size] = key;
859           HD[(h1 + k) % ht_size] = b->a + j * bs2;
860           break;
861 #if defined(PETSC_USE_INFO)
862         } else {
863           ct++;
864 #endif
865         }
866       }
867 #if defined(PETSC_USE_INFO)
868       if (k > max) max = k;
869 #endif
870     }
871   }
872 
873   /* Print Summary */
874 #if defined(PETSC_USE_INFO)
875   for (i = 0, j = 0; i < ht_size; i++) {
876     if (HT[i]) j++;
877   }
878   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
879 #endif
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886   PetscInt     nstash, reallocs;
887 
888   PetscFunctionBegin;
889   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
890 
891   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901 {
902   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
904   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
905   PetscInt    *row, *col;
906   PetscBool    r1, r2, r3, other_disassembled;
907   MatScalar   *val;
908   PetscMPIInt  n;
909 
910   PetscFunctionBegin;
911   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912   if (!baij->donotstash && !mat->nooffprocentries) {
913     while (1) {
914       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915       if (!flg) break;
916 
917       for (i = 0; i < n;) {
918         /* Now identify the consecutive vals belonging to the same row */
919         for (j = i, rstart = row[j]; j < n; j++) {
920           if (row[j] != rstart) break;
921         }
922         if (j < n) ncols = j - i;
923         else ncols = n - i;
924         /* Now assemble all these values with a single function call */
925         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926         i = j;
927       }
928     }
929     PetscCall(MatStashScatterEnd_Private(&mat->stash));
930     /* Now process the block-stash. Since the values are stashed column-oriented,
931        set the roworiented flag to column oriented, and after MatSetValues()
932        restore the original flags */
933     r1 = baij->roworiented;
934     r2 = a->roworiented;
935     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
936 
937     baij->roworiented = PETSC_FALSE;
938     a->roworiented    = PETSC_FALSE;
939 
940     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
941     while (1) {
942       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
943       if (!flg) break;
944 
945       for (i = 0; i < n;) {
946         /* Now identify the consecutive vals belonging to the same row */
947         for (j = i, rstart = row[j]; j < n; j++) {
948           if (row[j] != rstart) break;
949         }
950         if (j < n) ncols = j - i;
951         else ncols = n - i;
952         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
953         i = j;
954       }
955     }
956     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
957 
958     baij->roworiented = r1;
959     a->roworiented    = r2;
960 
961     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
962   }
963 
964   PetscCall(MatAssemblyBegin(baij->A, mode));
965   PetscCall(MatAssemblyEnd(baij->A, mode));
966 
967   /* determine if any processor has disassembled, if so we must
968      also disassemble ourselves, in order that we may reassemble. */
969   /*
970      if nonzero structure of submatrix B cannot change then we know that
971      no processor disassembled thus we can skip this stuff
972   */
973   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
974     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
975     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
976   }
977 
978   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
979   PetscCall(MatAssemblyBegin(baij->B, mode));
980   PetscCall(MatAssemblyEnd(baij->B, mode));
981 
982 #if defined(PETSC_USE_INFO)
983   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
984     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
985 
986     baij->ht_total_ct  = 0;
987     baij->ht_insert_ct = 0;
988   }
989 #endif
990   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
991     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
992 
993     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
994     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
995   }
996 
997   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
998 
999   baij->rowvalues = NULL;
1000 
1001   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1002   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
1003     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1004     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1005   }
1006   PetscFunctionReturn(PETSC_SUCCESS);
1007 }
1008 
1009 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1010 #include <petscdraw.h>
1011 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1012 {
1013   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1014   PetscMPIInt       rank = baij->rank;
1015   PetscInt          bs   = mat->rmap->bs;
1016   PetscBool         iascii, isdraw;
1017   PetscViewer       sviewer;
1018   PetscViewerFormat format;
1019 
1020   PetscFunctionBegin;
1021   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1022   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1023   if (iascii) {
1024     PetscCall(PetscViewerGetFormat(viewer, &format));
1025     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1026       MatInfo info;
1027       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1028       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1029       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1030       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1031                                                    mat->rmap->bs, (double)info.memory));
1032       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1033       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1034       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1035       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1036       PetscCall(PetscViewerFlush(viewer));
1037       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1038       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1039       PetscCall(VecScatterView(baij->Mvctx, viewer));
1040       PetscFunctionReturn(PETSC_SUCCESS);
1041     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1042       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1043       PetscFunctionReturn(PETSC_SUCCESS);
1044     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1045       PetscFunctionReturn(PETSC_SUCCESS);
1046     }
1047   }
1048 
1049   if (isdraw) {
1050     PetscDraw draw;
1051     PetscBool isnull;
1052     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1053     PetscCall(PetscDrawIsNull(draw, &isnull));
1054     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1055   }
1056 
1057   {
1058     /* assemble the entire matrix onto first processor. */
1059     Mat          A;
1060     Mat_SeqBAIJ *Aloc;
1061     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1062     MatScalar   *a;
1063     const char  *matname;
1064 
1065     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1066     /* Perhaps this should be the type of mat? */
1067     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1068     if (rank == 0) {
1069       PetscCall(MatSetSizes(A, M, N, M, N));
1070     } else {
1071       PetscCall(MatSetSizes(A, 0, 0, M, N));
1072     }
1073     PetscCall(MatSetType(A, MATMPIBAIJ));
1074     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1075     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1076 
1077     /* copy over the A part */
1078     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1079     ai   = Aloc->i;
1080     aj   = Aloc->j;
1081     a    = Aloc->a;
1082     PetscCall(PetscMalloc1(bs, &rvals));
1083 
1084     for (i = 0; i < mbs; i++) {
1085       rvals[0] = bs * (baij->rstartbs + i);
1086       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1087       for (j = ai[i]; j < ai[i + 1]; j++) {
1088         col = (baij->cstartbs + aj[j]) * bs;
1089         for (k = 0; k < bs; k++) {
1090           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1091           col++;
1092           a += bs;
1093         }
1094       }
1095     }
1096     /* copy over the B part */
1097     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1098     ai   = Aloc->i;
1099     aj   = Aloc->j;
1100     a    = Aloc->a;
1101     for (i = 0; i < mbs; i++) {
1102       rvals[0] = bs * (baij->rstartbs + i);
1103       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1104       for (j = ai[i]; j < ai[i + 1]; j++) {
1105         col = baij->garray[aj[j]] * bs;
1106         for (k = 0; k < bs; k++) {
1107           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1108           col++;
1109           a += bs;
1110         }
1111       }
1112     }
1113     PetscCall(PetscFree(rvals));
1114     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1115     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1116     /*
1117        Everyone has to call to draw the matrix since the graphics waits are
1118        synchronized across all processors that share the PetscDraw object
1119     */
1120     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1121     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1122     if (rank == 0) {
1123       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1124       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1125     }
1126     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1127     PetscCall(MatDestroy(&A));
1128   }
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1133 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1134 {
1135   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1136   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1137   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1138   const PetscInt *garray = aij->garray;
1139   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1140   PetscInt64      nz, hnz;
1141   PetscInt       *rowlens, *colidxs;
1142   PetscScalar    *matvals;
1143   PetscMPIInt     rank;
1144 
1145   PetscFunctionBegin;
1146   PetscCall(PetscViewerSetUp(viewer));
1147 
1148   M  = mat->rmap->N;
1149   N  = mat->cmap->N;
1150   m  = mat->rmap->n;
1151   rs = mat->rmap->rstart;
1152   cs = mat->cmap->rstart;
1153   bs = mat->rmap->bs;
1154   nz = bs * bs * (A->nz + B->nz);
1155 
1156   /* write matrix header */
1157   header[0] = MAT_FILE_CLASSID;
1158   header[1] = M;
1159   header[2] = N;
1160   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1161   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1162   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1163   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1164 
1165   /* fill in and store row lengths */
1166   PetscCall(PetscMalloc1(m, &rowlens));
1167   for (cnt = 0, i = 0; i < A->mbs; i++)
1168     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1169   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1170   PetscCall(PetscFree(rowlens));
1171 
1172   /* fill in and store column indices */
1173   PetscCall(PetscMalloc1(nz, &colidxs));
1174   for (cnt = 0, i = 0; i < A->mbs; i++) {
1175     for (k = 0; k < bs; k++) {
1176       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1177         if (garray[B->j[jb]] > cs / bs) break;
1178         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1179       }
1180       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1181         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1182       for (; jb < B->i[i + 1]; jb++)
1183         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1184     }
1185   }
1186   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1187   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1188   PetscCall(PetscFree(colidxs));
1189 
1190   /* fill in and store nonzero values */
1191   PetscCall(PetscMalloc1(nz, &matvals));
1192   for (cnt = 0, i = 0; i < A->mbs; i++) {
1193     for (k = 0; k < bs; k++) {
1194       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1195         if (garray[B->j[jb]] > cs / bs) break;
1196         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1197       }
1198       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1199         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1200       for (; jb < B->i[i + 1]; jb++)
1201         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1202     }
1203   }
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1205   PetscCall(PetscFree(matvals));
1206 
1207   /* write block size option to the viewer's .info file */
1208   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1209   PetscFunctionReturn(PETSC_SUCCESS);
1210 }
1211 
1212 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1213 {
1214   PetscBool iascii, isdraw, issocket, isbinary;
1215 
1216   PetscFunctionBegin;
1217   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1218   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1219   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1220   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1221   if (iascii || isdraw || issocket) {
1222     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1223   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1224   PetscFunctionReturn(PETSC_SUCCESS);
1225 }
1226 
1227 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1228 {
1229   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1230   PetscInt     nt;
1231 
1232   PetscFunctionBegin;
1233   PetscCall(VecGetLocalSize(xx, &nt));
1234   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1235   PetscCall(VecGetLocalSize(yy, &nt));
1236   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1237   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1238   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1239   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1240   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1245 {
1246   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1247 
1248   PetscFunctionBegin;
1249   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1250   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1251   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1252   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1253   PetscFunctionReturn(PETSC_SUCCESS);
1254 }
1255 
1256 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1257 {
1258   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1259 
1260   PetscFunctionBegin;
1261   /* do nondiagonal part */
1262   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1263   /* do local part */
1264   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1265   /* add partial results together */
1266   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1267   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1268   PetscFunctionReturn(PETSC_SUCCESS);
1269 }
1270 
1271 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1272 {
1273   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1274 
1275   PetscFunctionBegin;
1276   /* do nondiagonal part */
1277   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1278   /* do local part */
1279   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1280   /* add partial results together */
1281   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1282   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1283   PetscFunctionReturn(PETSC_SUCCESS);
1284 }
1285 
1286 /*
1287   This only works correctly for square matrices where the subblock A->A is the
1288    diagonal block
1289 */
1290 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1291 {
1292   PetscFunctionBegin;
1293   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1294   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1295   PetscFunctionReturn(PETSC_SUCCESS);
1296 }
1297 
1298 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1299 {
1300   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1301 
1302   PetscFunctionBegin;
1303   PetscCall(MatScale(a->A, aa));
1304   PetscCall(MatScale(a->B, aa));
1305   PetscFunctionReturn(PETSC_SUCCESS);
1306 }
1307 
1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1309 {
1310   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1311   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1312   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1313   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1314   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1315 
1316   PetscFunctionBegin;
1317   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1318   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1319   mat->getrowactive = PETSC_TRUE;
1320 
1321   if (!mat->rowvalues && (idx || v)) {
1322     /*
1323         allocate enough space to hold information from the longest row.
1324     */
1325     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1326     PetscInt     max = 1, mbs = mat->mbs, tmp;
1327     for (i = 0; i < mbs; i++) {
1328       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1329       if (max < tmp) max = tmp;
1330     }
1331     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1332   }
1333   lrow = row - brstart;
1334 
1335   pvA = &vworkA;
1336   pcA = &cworkA;
1337   pvB = &vworkB;
1338   pcB = &cworkB;
1339   if (!v) {
1340     pvA = NULL;
1341     pvB = NULL;
1342   }
1343   if (!idx) {
1344     pcA = NULL;
1345     if (!v) pcB = NULL;
1346   }
1347   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1348   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1349   nztot = nzA + nzB;
1350 
1351   cmap = mat->garray;
1352   if (v || idx) {
1353     if (nztot) {
1354       /* Sort by increasing column numbers, assuming A and B already sorted */
1355       PetscInt imark = -1;
1356       if (v) {
1357         *v = v_p = mat->rowvalues;
1358         for (i = 0; i < nzB; i++) {
1359           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1360           else break;
1361         }
1362         imark = i;
1363         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1364         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1365       }
1366       if (idx) {
1367         *idx = idx_p = mat->rowindices;
1368         if (imark > -1) {
1369           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1370         } else {
1371           for (i = 0; i < nzB; i++) {
1372             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1373             else break;
1374           }
1375           imark = i;
1376         }
1377         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1378         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1379       }
1380     } else {
1381       if (idx) *idx = NULL;
1382       if (v) *v = NULL;
1383     }
1384   }
1385   *nz = nztot;
1386   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1387   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1388   PetscFunctionReturn(PETSC_SUCCESS);
1389 }
1390 
1391 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1392 {
1393   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1394 
1395   PetscFunctionBegin;
1396   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1397   baij->getrowactive = PETSC_FALSE;
1398   PetscFunctionReturn(PETSC_SUCCESS);
1399 }
1400 
1401 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1402 {
1403   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1404 
1405   PetscFunctionBegin;
1406   PetscCall(MatZeroEntries(l->A));
1407   PetscCall(MatZeroEntries(l->B));
1408   PetscFunctionReturn(PETSC_SUCCESS);
1409 }
1410 
1411 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1412 {
1413   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1414   Mat            A = a->A, B = a->B;
1415   PetscLogDouble isend[5], irecv[5];
1416 
1417   PetscFunctionBegin;
1418   info->block_size = (PetscReal)matin->rmap->bs;
1419 
1420   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1421 
1422   isend[0] = info->nz_used;
1423   isend[1] = info->nz_allocated;
1424   isend[2] = info->nz_unneeded;
1425   isend[3] = info->memory;
1426   isend[4] = info->mallocs;
1427 
1428   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1429 
1430   isend[0] += info->nz_used;
1431   isend[1] += info->nz_allocated;
1432   isend[2] += info->nz_unneeded;
1433   isend[3] += info->memory;
1434   isend[4] += info->mallocs;
1435 
1436   if (flag == MAT_LOCAL) {
1437     info->nz_used      = isend[0];
1438     info->nz_allocated = isend[1];
1439     info->nz_unneeded  = isend[2];
1440     info->memory       = isend[3];
1441     info->mallocs      = isend[4];
1442   } else if (flag == MAT_GLOBAL_MAX) {
1443     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1444 
1445     info->nz_used      = irecv[0];
1446     info->nz_allocated = irecv[1];
1447     info->nz_unneeded  = irecv[2];
1448     info->memory       = irecv[3];
1449     info->mallocs      = irecv[4];
1450   } else if (flag == MAT_GLOBAL_SUM) {
1451     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1452 
1453     info->nz_used      = irecv[0];
1454     info->nz_allocated = irecv[1];
1455     info->nz_unneeded  = irecv[2];
1456     info->memory       = irecv[3];
1457     info->mallocs      = irecv[4];
1458   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1459   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1460   info->fill_ratio_needed = 0;
1461   info->factor_mallocs    = 0;
1462   PetscFunctionReturn(PETSC_SUCCESS);
1463 }
1464 
1465 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1466 {
1467   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1468 
1469   PetscFunctionBegin;
1470   switch (op) {
1471   case MAT_NEW_NONZERO_LOCATIONS:
1472   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1474   case MAT_KEEP_NONZERO_PATTERN:
1475   case MAT_NEW_NONZERO_LOCATION_ERR:
1476     MatCheckPreallocated(A, 1);
1477     PetscCall(MatSetOption(a->A, op, flg));
1478     PetscCall(MatSetOption(a->B, op, flg));
1479     break;
1480   case MAT_ROW_ORIENTED:
1481     MatCheckPreallocated(A, 1);
1482     a->roworiented = flg;
1483 
1484     PetscCall(MatSetOption(a->A, op, flg));
1485     PetscCall(MatSetOption(a->B, op, flg));
1486     break;
1487   case MAT_FORCE_DIAGONAL_ENTRIES:
1488   case MAT_SORTED_FULL:
1489     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1490     break;
1491   case MAT_IGNORE_OFF_PROC_ENTRIES:
1492     a->donotstash = flg;
1493     break;
1494   case MAT_USE_HASH_TABLE:
1495     a->ht_flag = flg;
1496     a->ht_fact = 1.39;
1497     break;
1498   case MAT_SYMMETRIC:
1499   case MAT_STRUCTURALLY_SYMMETRIC:
1500   case MAT_HERMITIAN:
1501   case MAT_SUBMAT_SINGLEIS:
1502   case MAT_SYMMETRY_ETERNAL:
1503   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1504   case MAT_SPD_ETERNAL:
1505     /* if the diagonal matrix is square it inherits some of the properties above */
1506     break;
1507   default:
1508     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1509   }
1510   PetscFunctionReturn(PETSC_SUCCESS);
1511 }
1512 
1513 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1514 {
1515   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1516   Mat_SeqBAIJ *Aloc;
1517   Mat          B;
1518   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1519   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1520   MatScalar   *a;
1521 
1522   PetscFunctionBegin;
1523   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1524   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1525     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1526     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1527     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1528     /* Do not know preallocation information, but must set block size */
1529     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1530   } else {
1531     B = *matout;
1532   }
1533 
1534   /* copy over the A part */
1535   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1536   ai   = Aloc->i;
1537   aj   = Aloc->j;
1538   a    = Aloc->a;
1539   PetscCall(PetscMalloc1(bs, &rvals));
1540 
1541   for (i = 0; i < mbs; i++) {
1542     rvals[0] = bs * (baij->rstartbs + i);
1543     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1544     for (j = ai[i]; j < ai[i + 1]; j++) {
1545       col = (baij->cstartbs + aj[j]) * bs;
1546       for (k = 0; k < bs; k++) {
1547         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1548 
1549         col++;
1550         a += bs;
1551       }
1552     }
1553   }
1554   /* copy over the B part */
1555   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1556   ai   = Aloc->i;
1557   aj   = Aloc->j;
1558   a    = Aloc->a;
1559   for (i = 0; i < mbs; i++) {
1560     rvals[0] = bs * (baij->rstartbs + i);
1561     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1562     for (j = ai[i]; j < ai[i + 1]; j++) {
1563       col = baij->garray[aj[j]] * bs;
1564       for (k = 0; k < bs; k++) {
1565         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1566         col++;
1567         a += bs;
1568       }
1569     }
1570   }
1571   PetscCall(PetscFree(rvals));
1572   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1574 
1575   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1576   else PetscCall(MatHeaderMerge(A, &B));
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1581 {
1582   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1583   Mat          a = baij->A, b = baij->B;
1584   PetscInt     s1, s2, s3;
1585 
1586   PetscFunctionBegin;
1587   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1588   if (rr) {
1589     PetscCall(VecGetLocalSize(rr, &s1));
1590     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1591     /* Overlap communication with computation. */
1592     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1593   }
1594   if (ll) {
1595     PetscCall(VecGetLocalSize(ll, &s1));
1596     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1597     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1598   }
1599   /* scale  the diagonal block */
1600   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1601 
1602   if (rr) {
1603     /* Do a scatter end and then right scale the off-diagonal block */
1604     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1605     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1606   }
1607   PetscFunctionReturn(PETSC_SUCCESS);
1608 }
1609 
1610 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1611 {
1612   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1613   PetscInt    *lrows;
1614   PetscInt     r, len;
1615   PetscBool    cong;
1616 
1617   PetscFunctionBegin;
1618   /* get locally owned rows */
1619   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1620   /* fix right hand side if needed */
1621   if (x && b) {
1622     const PetscScalar *xx;
1623     PetscScalar       *bb;
1624 
1625     PetscCall(VecGetArrayRead(x, &xx));
1626     PetscCall(VecGetArray(b, &bb));
1627     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1628     PetscCall(VecRestoreArrayRead(x, &xx));
1629     PetscCall(VecRestoreArray(b, &bb));
1630   }
1631 
1632   /* actually zap the local rows */
1633   /*
1634         Zero the required rows. If the "diagonal block" of the matrix
1635      is square and the user wishes to set the diagonal we use separate
1636      code so that MatSetValues() is not called for each diagonal allocating
1637      new memory, thus calling lots of mallocs and slowing things down.
1638 
1639   */
1640   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1641   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1642   PetscCall(MatHasCongruentLayouts(A, &cong));
1643   if ((diag != 0.0) && cong) {
1644     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1645   } else if (diag != 0.0) {
1646     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1647     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1648        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1649     for (r = 0; r < len; ++r) {
1650       const PetscInt row = lrows[r] + A->rmap->rstart;
1651       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1652     }
1653     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1654     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1655   } else {
1656     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1657   }
1658   PetscCall(PetscFree(lrows));
1659 
1660   /* only change matrix nonzero state if pattern was allowed to be changed */
1661   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1662     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1663     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1664   }
1665   PetscFunctionReturn(PETSC_SUCCESS);
1666 }
1667 
1668 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1669 {
1670   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1671   PetscMPIInt        n = A->rmap->n, p = 0;
1672   PetscInt           i, j, k, r, len = 0, row, col, count;
1673   PetscInt          *lrows, *owners = A->rmap->range;
1674   PetscSFNode       *rrows;
1675   PetscSF            sf;
1676   const PetscScalar *xx;
1677   PetscScalar       *bb, *mask;
1678   Vec                xmask, lmask;
1679   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1680   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1681   PetscScalar       *aa;
1682 
1683   PetscFunctionBegin;
1684   /* Create SF where leaves are input rows and roots are owned rows */
1685   PetscCall(PetscMalloc1(n, &lrows));
1686   for (r = 0; r < n; ++r) lrows[r] = -1;
1687   PetscCall(PetscMalloc1(N, &rrows));
1688   for (r = 0; r < N; ++r) {
1689     const PetscInt idx = rows[r];
1690     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1691     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1692       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1693     }
1694     rrows[r].rank  = p;
1695     rrows[r].index = rows[r] - owners[p];
1696   }
1697   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1698   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1699   /* Collect flags for rows to be zeroed */
1700   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1701   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1702   PetscCall(PetscSFDestroy(&sf));
1703   /* Compress and put in row numbers */
1704   for (r = 0; r < n; ++r)
1705     if (lrows[r] >= 0) lrows[len++] = r;
1706   /* zero diagonal part of matrix */
1707   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1708   /* handle off-diagonal part of matrix */
1709   PetscCall(MatCreateVecs(A, &xmask, NULL));
1710   PetscCall(VecDuplicate(l->lvec, &lmask));
1711   PetscCall(VecGetArray(xmask, &bb));
1712   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1713   PetscCall(VecRestoreArray(xmask, &bb));
1714   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1715   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1716   PetscCall(VecDestroy(&xmask));
1717   if (x) {
1718     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1719     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1720     PetscCall(VecGetArrayRead(l->lvec, &xx));
1721     PetscCall(VecGetArray(b, &bb));
1722   }
1723   PetscCall(VecGetArray(lmask, &mask));
1724   /* remove zeroed rows of off-diagonal matrix */
1725   for (i = 0; i < len; ++i) {
1726     row   = lrows[i];
1727     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1728     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1729     for (k = 0; k < count; ++k) {
1730       aa[0] = 0.0;
1731       aa += bs;
1732     }
1733   }
1734   /* loop over all elements of off process part of matrix zeroing removed columns*/
1735   for (i = 0; i < l->B->rmap->N; ++i) {
1736     row = i / bs;
1737     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1738       for (k = 0; k < bs; ++k) {
1739         col = bs * baij->j[j] + k;
1740         if (PetscAbsScalar(mask[col])) {
1741           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1742           if (x) bb[i] -= aa[0] * xx[col];
1743           aa[0] = 0.0;
1744         }
1745       }
1746     }
1747   }
1748   if (x) {
1749     PetscCall(VecRestoreArray(b, &bb));
1750     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1751   }
1752   PetscCall(VecRestoreArray(lmask, &mask));
1753   PetscCall(VecDestroy(&lmask));
1754   PetscCall(PetscFree(lrows));
1755 
1756   /* only change matrix nonzero state if pattern was allowed to be changed */
1757   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1758     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1759     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1760   }
1761   PetscFunctionReturn(PETSC_SUCCESS);
1762 }
1763 
1764 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1765 {
1766   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1767 
1768   PetscFunctionBegin;
1769   PetscCall(MatSetUnfactored(a->A));
1770   PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772 
1773 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1774 
1775 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1776 {
1777   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1778   Mat          a, b, c, d;
1779   PetscBool    flg;
1780 
1781   PetscFunctionBegin;
1782   a = matA->A;
1783   b = matA->B;
1784   c = matB->A;
1785   d = matB->B;
1786 
1787   PetscCall(MatEqual(a, c, &flg));
1788   if (flg) PetscCall(MatEqual(b, d, &flg));
1789   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1790   PetscFunctionReturn(PETSC_SUCCESS);
1791 }
1792 
1793 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1794 {
1795   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1796   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1797 
1798   PetscFunctionBegin;
1799   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1800   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1801     PetscCall(MatCopy_Basic(A, B, str));
1802   } else {
1803     PetscCall(MatCopy(a->A, b->A, str));
1804     PetscCall(MatCopy(a->B, b->B, str));
1805   }
1806   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1807   PetscFunctionReturn(PETSC_SUCCESS);
1808 }
1809 
1810 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1811 {
1812   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1813   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1814   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1815 
1816   PetscFunctionBegin;
1817   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1818   PetscFunctionReturn(PETSC_SUCCESS);
1819 }
1820 
1821 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1822 {
1823   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1824   PetscBLASInt bnz, one                         = 1;
1825   Mat_SeqBAIJ *x, *y;
1826   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1827 
1828   PetscFunctionBegin;
1829   if (str == SAME_NONZERO_PATTERN) {
1830     PetscScalar alpha = a;
1831     x                 = (Mat_SeqBAIJ *)xx->A->data;
1832     y                 = (Mat_SeqBAIJ *)yy->A->data;
1833     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1834     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1835     x = (Mat_SeqBAIJ *)xx->B->data;
1836     y = (Mat_SeqBAIJ *)yy->B->data;
1837     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1838     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1839     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1840   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1841     PetscCall(MatAXPY_Basic(Y, a, X, str));
1842   } else {
1843     Mat       B;
1844     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1845     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1846     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1847     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1848     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1849     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1850     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1851     PetscCall(MatSetType(B, MATMPIBAIJ));
1852     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1853     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1854     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1855     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1856     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1857     PetscCall(MatHeaderMerge(Y, &B));
1858     PetscCall(PetscFree(nnz_d));
1859     PetscCall(PetscFree(nnz_o));
1860   }
1861   PetscFunctionReturn(PETSC_SUCCESS);
1862 }
1863 
1864 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1865 {
1866   PetscFunctionBegin;
1867   if (PetscDefined(USE_COMPLEX)) {
1868     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1869 
1870     PetscCall(MatConjugate_SeqBAIJ(a->A));
1871     PetscCall(MatConjugate_SeqBAIJ(a->B));
1872   }
1873   PetscFunctionReturn(PETSC_SUCCESS);
1874 }
1875 
1876 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1877 {
1878   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1879 
1880   PetscFunctionBegin;
1881   PetscCall(MatRealPart(a->A));
1882   PetscCall(MatRealPart(a->B));
1883   PetscFunctionReturn(PETSC_SUCCESS);
1884 }
1885 
1886 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1887 {
1888   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1889 
1890   PetscFunctionBegin;
1891   PetscCall(MatImaginaryPart(a->A));
1892   PetscCall(MatImaginaryPart(a->B));
1893   PetscFunctionReturn(PETSC_SUCCESS);
1894 }
1895 
1896 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1897 {
1898   IS       iscol_local;
1899   PetscInt csize;
1900 
1901   PetscFunctionBegin;
1902   PetscCall(ISGetLocalSize(iscol, &csize));
1903   if (call == MAT_REUSE_MATRIX) {
1904     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1905     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1906   } else {
1907     PetscCall(ISAllGather(iscol, &iscol_local));
1908   }
1909   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1910   if (call == MAT_INITIAL_MATRIX) {
1911     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1912     PetscCall(ISDestroy(&iscol_local));
1913   }
1914   PetscFunctionReturn(PETSC_SUCCESS);
1915 }
1916 
1917 /*
1918   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1919   in local and then by concatenating the local matrices the end result.
1920   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1921   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1922 */
1923 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1924 {
1925   PetscMPIInt  rank, size;
1926   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1927   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1928   Mat          M, Mreuse;
1929   MatScalar   *vwork, *aa;
1930   MPI_Comm     comm;
1931   IS           isrow_new, iscol_new;
1932   Mat_SeqBAIJ *aij;
1933 
1934   PetscFunctionBegin;
1935   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1936   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1937   PetscCallMPI(MPI_Comm_size(comm, &size));
1938   /* The compression and expansion should be avoided. Doesn't point
1939      out errors, might change the indices, hence buggey */
1940   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1941   if (isrow == iscol) {
1942     iscol_new = isrow_new;
1943     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1944   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1945 
1946   if (call == MAT_REUSE_MATRIX) {
1947     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1948     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1949     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1950   } else {
1951     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1952   }
1953   PetscCall(ISDestroy(&isrow_new));
1954   PetscCall(ISDestroy(&iscol_new));
1955   /*
1956       m - number of local rows
1957       n - number of columns (same on all processors)
1958       rstart - first row in new global matrix generated
1959   */
1960   PetscCall(MatGetBlockSize(mat, &bs));
1961   PetscCall(MatGetSize(Mreuse, &m, &n));
1962   m = m / bs;
1963   n = n / bs;
1964 
1965   if (call == MAT_INITIAL_MATRIX) {
1966     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1967     ii  = aij->i;
1968     jj  = aij->j;
1969 
1970     /*
1971         Determine the number of non-zeros in the diagonal and off-diagonal
1972         portions of the matrix in order to do correct preallocation
1973     */
1974 
1975     /* first get start and end of "diagonal" columns */
1976     if (csize == PETSC_DECIDE) {
1977       PetscCall(ISGetSize(isrow, &mglobal));
1978       if (mglobal == n * bs) { /* square matrix */
1979         nlocal = m;
1980       } else {
1981         nlocal = n / size + ((n % size) > rank);
1982       }
1983     } else {
1984       nlocal = csize / bs;
1985     }
1986     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1987     rstart = rend - nlocal;
1988     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1989 
1990     /* next, compute all the lengths */
1991     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1992     for (i = 0; i < m; i++) {
1993       jend = ii[i + 1] - ii[i];
1994       olen = 0;
1995       dlen = 0;
1996       for (j = 0; j < jend; j++) {
1997         if (*jj < rstart || *jj >= rend) olen++;
1998         else dlen++;
1999         jj++;
2000       }
2001       olens[i] = olen;
2002       dlens[i] = dlen;
2003     }
2004     PetscCall(MatCreate(comm, &M));
2005     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2006     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2007     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2008     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2009     PetscCall(PetscFree2(dlens, olens));
2010   } else {
2011     PetscInt ml, nl;
2012 
2013     M = *newmat;
2014     PetscCall(MatGetLocalSize(M, &ml, &nl));
2015     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2016     PetscCall(MatZeroEntries(M));
2017     /*
2018          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2019        rather than the slower MatSetValues().
2020     */
2021     M->was_assembled = PETSC_TRUE;
2022     M->assembled     = PETSC_FALSE;
2023   }
2024   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2025   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2026   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2027   ii  = aij->i;
2028   jj  = aij->j;
2029   aa  = aij->a;
2030   for (i = 0; i < m; i++) {
2031     row   = rstart / bs + i;
2032     nz    = ii[i + 1] - ii[i];
2033     cwork = jj;
2034     jj += nz;
2035     vwork = aa;
2036     aa += nz * bs * bs;
2037     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2038   }
2039 
2040   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2041   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2042   *newmat = M;
2043 
2044   /* save submatrix used in processor for next request */
2045   if (call == MAT_INITIAL_MATRIX) {
2046     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2047     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2048   }
2049   PetscFunctionReturn(PETSC_SUCCESS);
2050 }
2051 
2052 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2053 {
2054   MPI_Comm        comm, pcomm;
2055   PetscInt        clocal_size, nrows;
2056   const PetscInt *rows;
2057   PetscMPIInt     size;
2058   IS              crowp, lcolp;
2059 
2060   PetscFunctionBegin;
2061   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2062   /* make a collective version of 'rowp' */
2063   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2064   if (pcomm == comm) {
2065     crowp = rowp;
2066   } else {
2067     PetscCall(ISGetSize(rowp, &nrows));
2068     PetscCall(ISGetIndices(rowp, &rows));
2069     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2070     PetscCall(ISRestoreIndices(rowp, &rows));
2071   }
2072   PetscCall(ISSetPermutation(crowp));
2073   /* make a local version of 'colp' */
2074   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2075   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2076   if (size == 1) {
2077     lcolp = colp;
2078   } else {
2079     PetscCall(ISAllGather(colp, &lcolp));
2080   }
2081   PetscCall(ISSetPermutation(lcolp));
2082   /* now we just get the submatrix */
2083   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2084   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2085   /* clean up */
2086   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2087   if (size > 1) PetscCall(ISDestroy(&lcolp));
2088   PetscFunctionReturn(PETSC_SUCCESS);
2089 }
2090 
2091 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2092 {
2093   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2094   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2095 
2096   PetscFunctionBegin;
2097   if (nghosts) *nghosts = B->nbs;
2098   if (ghosts) *ghosts = baij->garray;
2099   PetscFunctionReturn(PETSC_SUCCESS);
2100 }
2101 
2102 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2103 {
2104   Mat          B;
2105   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2106   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2107   Mat_SeqAIJ  *b;
2108   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2109   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2110   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2111 
2112   PetscFunctionBegin;
2113   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2114   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2115 
2116   /*   Tell every processor the number of nonzeros per row  */
2117   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2118   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2119   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2120   displs = recvcounts + size;
2121   for (i = 0; i < size; i++) {
2122     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2123     displs[i]     = A->rmap->range[i] / bs;
2124   }
2125   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2126   /* Create the sequential matrix of the same type as the local block diagonal  */
2127   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2128   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2129   PetscCall(MatSetType(B, MATSEQAIJ));
2130   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2131   b = (Mat_SeqAIJ *)B->data;
2132 
2133   /*     Copy my part of matrix column indices over  */
2134   sendcount  = ad->nz + bd->nz;
2135   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2136   a_jsendbuf = ad->j;
2137   b_jsendbuf = bd->j;
2138   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2139   cnt        = 0;
2140   for (i = 0; i < n; i++) {
2141     /* put in lower diagonal portion */
2142     m = bd->i[i + 1] - bd->i[i];
2143     while (m > 0) {
2144       /* is it above diagonal (in bd (compressed) numbering) */
2145       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2146       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2147       m--;
2148     }
2149 
2150     /* put in diagonal portion */
2151     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2152 
2153     /* put in upper diagonal portion */
2154     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2155   }
2156   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2157 
2158   /*  Gather all column indices to all processors  */
2159   for (i = 0; i < size; i++) {
2160     recvcounts[i] = 0;
2161     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2162   }
2163   displs[0] = 0;
2164   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2165   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2166   /*  Assemble the matrix into useable form (note numerical values not yet set)  */
2167   /* set the b->ilen (length of each row) values */
2168   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2169   /* set the b->i indices */
2170   b->i[0] = 0;
2171   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2172   PetscCall(PetscFree(lens));
2173   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2174   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2175   PetscCall(PetscFree(recvcounts));
2176 
2177   PetscCall(MatPropagateSymmetryOptions(A, B));
2178   *newmat = B;
2179   PetscFunctionReturn(PETSC_SUCCESS);
2180 }
2181 
2182 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2183 {
2184   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2185   Vec          bb1 = NULL;
2186 
2187   PetscFunctionBegin;
2188   if (flag == SOR_APPLY_UPPER) {
2189     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2190     PetscFunctionReturn(PETSC_SUCCESS);
2191   }
2192 
2193   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2194 
2195   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2196     if (flag & SOR_ZERO_INITIAL_GUESS) {
2197       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2198       its--;
2199     }
2200 
2201     while (its--) {
2202       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2203       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2204 
2205       /* update rhs: bb1 = bb - B*x */
2206       PetscCall(VecScale(mat->lvec, -1.0));
2207       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2208 
2209       /* local sweep */
2210       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2211     }
2212   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2213     if (flag & SOR_ZERO_INITIAL_GUESS) {
2214       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2215       its--;
2216     }
2217     while (its--) {
2218       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2219       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2220 
2221       /* update rhs: bb1 = bb - B*x */
2222       PetscCall(VecScale(mat->lvec, -1.0));
2223       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2224 
2225       /* local sweep */
2226       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2227     }
2228   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2229     if (flag & SOR_ZERO_INITIAL_GUESS) {
2230       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2231       its--;
2232     }
2233     while (its--) {
2234       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2235       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2236 
2237       /* update rhs: bb1 = bb - B*x */
2238       PetscCall(VecScale(mat->lvec, -1.0));
2239       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2240 
2241       /* local sweep */
2242       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2243     }
2244   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2245 
2246   PetscCall(VecDestroy(&bb1));
2247   PetscFunctionReturn(PETSC_SUCCESS);
2248 }
2249 
2250 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2251 {
2252   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2253   PetscInt     m, N, i, *garray = aij->garray;
2254   PetscInt     ib, jb, bs = A->rmap->bs;
2255   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2256   MatScalar   *a_val = a_aij->a;
2257   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2258   MatScalar   *b_val = b_aij->a;
2259   PetscReal   *work;
2260 
2261   PetscFunctionBegin;
2262   PetscCall(MatGetSize(A, &m, &N));
2263   PetscCall(PetscCalloc1(N, &work));
2264   if (type == NORM_2) {
2265     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2266       for (jb = 0; jb < bs; jb++) {
2267         for (ib = 0; ib < bs; ib++) {
2268           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2269           a_val++;
2270         }
2271       }
2272     }
2273     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2274       for (jb = 0; jb < bs; jb++) {
2275         for (ib = 0; ib < bs; ib++) {
2276           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2277           b_val++;
2278         }
2279       }
2280     }
2281   } else if (type == NORM_1) {
2282     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2283       for (jb = 0; jb < bs; jb++) {
2284         for (ib = 0; ib < bs; ib++) {
2285           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2286           a_val++;
2287         }
2288       }
2289     }
2290     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2291       for (jb = 0; jb < bs; jb++) {
2292         for (ib = 0; ib < bs; ib++) {
2293           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2294           b_val++;
2295         }
2296       }
2297     }
2298   } else if (type == NORM_INFINITY) {
2299     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2300       for (jb = 0; jb < bs; jb++) {
2301         for (ib = 0; ib < bs; ib++) {
2302           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2303           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2304           a_val++;
2305         }
2306       }
2307     }
2308     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2309       for (jb = 0; jb < bs; jb++) {
2310         for (ib = 0; ib < bs; ib++) {
2311           int col   = garray[b_aij->j[i]] * bs + jb;
2312           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2313           b_val++;
2314         }
2315       }
2316     }
2317   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2318     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2319       for (jb = 0; jb < bs; jb++) {
2320         for (ib = 0; ib < bs; ib++) {
2321           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2322           a_val++;
2323         }
2324       }
2325     }
2326     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2327       for (jb = 0; jb < bs; jb++) {
2328         for (ib = 0; ib < bs; ib++) {
2329           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2330           b_val++;
2331         }
2332       }
2333     }
2334   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2335     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2336       for (jb = 0; jb < bs; jb++) {
2337         for (ib = 0; ib < bs; ib++) {
2338           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2339           a_val++;
2340         }
2341       }
2342     }
2343     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2344       for (jb = 0; jb < bs; jb++) {
2345         for (ib = 0; ib < bs; ib++) {
2346           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2347           b_val++;
2348         }
2349       }
2350     }
2351   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2352   if (type == NORM_INFINITY) {
2353     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2354   } else {
2355     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2356   }
2357   PetscCall(PetscFree(work));
2358   if (type == NORM_2) {
2359     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2360   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2361     for (i = 0; i < N; i++) reductions[i] /= m;
2362   }
2363   PetscFunctionReturn(PETSC_SUCCESS);
2364 }
2365 
2366 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2367 {
2368   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2369 
2370   PetscFunctionBegin;
2371   PetscCall(MatInvertBlockDiagonal(a->A, values));
2372   A->factorerrortype             = a->A->factorerrortype;
2373   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2374   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2375   PetscFunctionReturn(PETSC_SUCCESS);
2376 }
2377 
2378 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2379 {
2380   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2381   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2382 
2383   PetscFunctionBegin;
2384   if (!Y->preallocated) {
2385     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2386   } else if (!aij->nz) {
2387     PetscInt nonew = aij->nonew;
2388     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2389     aij->nonew = nonew;
2390   }
2391   PetscCall(MatShift_Basic(Y, a));
2392   PetscFunctionReturn(PETSC_SUCCESS);
2393 }
2394 
2395 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2396 {
2397   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2398 
2399   PetscFunctionBegin;
2400   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2401   PetscCall(MatMissingDiagonal(a->A, missing, d));
2402   if (d) {
2403     PetscInt rstart;
2404     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2405     *d += rstart / A->rmap->bs;
2406   }
2407   PetscFunctionReturn(PETSC_SUCCESS);
2408 }
2409 
2410 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2411 {
2412   PetscFunctionBegin;
2413   *a = ((Mat_MPIBAIJ *)A->data)->A;
2414   PetscFunctionReturn(PETSC_SUCCESS);
2415 }
2416 
2417 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2418 {
2419   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2420 
2421   PetscFunctionBegin;
2422   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2423   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2424   PetscFunctionReturn(PETSC_SUCCESS);
2425 }
2426 
2427 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2428                                        MatGetRow_MPIBAIJ,
2429                                        MatRestoreRow_MPIBAIJ,
2430                                        MatMult_MPIBAIJ,
2431                                        /* 4*/ MatMultAdd_MPIBAIJ,
2432                                        MatMultTranspose_MPIBAIJ,
2433                                        MatMultTransposeAdd_MPIBAIJ,
2434                                        NULL,
2435                                        NULL,
2436                                        NULL,
2437                                        /*10*/ NULL,
2438                                        NULL,
2439                                        NULL,
2440                                        MatSOR_MPIBAIJ,
2441                                        MatTranspose_MPIBAIJ,
2442                                        /*15*/ MatGetInfo_MPIBAIJ,
2443                                        MatEqual_MPIBAIJ,
2444                                        MatGetDiagonal_MPIBAIJ,
2445                                        MatDiagonalScale_MPIBAIJ,
2446                                        MatNorm_MPIBAIJ,
2447                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2448                                        MatAssemblyEnd_MPIBAIJ,
2449                                        MatSetOption_MPIBAIJ,
2450                                        MatZeroEntries_MPIBAIJ,
2451                                        /*24*/ MatZeroRows_MPIBAIJ,
2452                                        NULL,
2453                                        NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        /*29*/ MatSetUp_MPI_Hash,
2457                                        NULL,
2458                                        NULL,
2459                                        MatGetDiagonalBlock_MPIBAIJ,
2460                                        NULL,
2461                                        /*34*/ MatDuplicate_MPIBAIJ,
2462                                        NULL,
2463                                        NULL,
2464                                        NULL,
2465                                        NULL,
2466                                        /*39*/ MatAXPY_MPIBAIJ,
2467                                        MatCreateSubMatrices_MPIBAIJ,
2468                                        MatIncreaseOverlap_MPIBAIJ,
2469                                        MatGetValues_MPIBAIJ,
2470                                        MatCopy_MPIBAIJ,
2471                                        /*44*/ NULL,
2472                                        MatScale_MPIBAIJ,
2473                                        MatShift_MPIBAIJ,
2474                                        NULL,
2475                                        MatZeroRowsColumns_MPIBAIJ,
2476                                        /*49*/ NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2482                                        NULL,
2483                                        MatSetUnfactored_MPIBAIJ,
2484                                        MatPermute_MPIBAIJ,
2485                                        MatSetValuesBlocked_MPIBAIJ,
2486                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2487                                        MatDestroy_MPIBAIJ,
2488                                        MatView_MPIBAIJ,
2489                                        NULL,
2490                                        NULL,
2491                                        /*64*/ NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2497                                        NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        /*74*/ NULL,
2502                                        MatFDColoringApply_BAIJ,
2503                                        NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        /*79*/ NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        MatLoad_MPIBAIJ,
2511                                        /*84*/ NULL,
2512                                        NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        /*89*/ NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        /*94*/ NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        /*99*/ NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        MatConjugate_MPIBAIJ,
2530                                        NULL,
2531                                        /*104*/ NULL,
2532                                        MatRealPart_MPIBAIJ,
2533                                        MatImaginaryPart_MPIBAIJ,
2534                                        NULL,
2535                                        NULL,
2536                                        /*109*/ NULL,
2537                                        NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        MatMissingDiagonal_MPIBAIJ,
2541                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2542                                        NULL,
2543                                        MatGetGhosts_MPIBAIJ,
2544                                        NULL,
2545                                        NULL,
2546                                        /*119*/ NULL,
2547                                        NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        MatGetMultiProcBlock_MPIBAIJ,
2551                                        /*124*/ NULL,
2552                                        MatGetColumnReductions_MPIBAIJ,
2553                                        MatInvertBlockDiagonal_MPIBAIJ,
2554                                        NULL,
2555                                        NULL,
2556                                        /*129*/ NULL,
2557                                        NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        /*134*/ NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        /*139*/ MatSetBlockSizes_Default,
2567                                        NULL,
2568                                        NULL,
2569                                        MatFDColoringSetUp_MPIXAIJ,
2570                                        NULL,
2571                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                        /*150*/ NULL,
2578                                        MatEliminateZeros_MPIBAIJ};
2579 
2580 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2581 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2582 
2583 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2584 {
2585   PetscInt        m, rstart, cstart, cend;
2586   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2587   const PetscInt *JJ          = NULL;
2588   PetscScalar    *values      = NULL;
2589   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2590   PetscBool       nooffprocentries;
2591 
2592   PetscFunctionBegin;
2593   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2594   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2595   PetscCall(PetscLayoutSetUp(B->rmap));
2596   PetscCall(PetscLayoutSetUp(B->cmap));
2597   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2598   m      = B->rmap->n / bs;
2599   rstart = B->rmap->rstart / bs;
2600   cstart = B->cmap->rstart / bs;
2601   cend   = B->cmap->rend / bs;
2602 
2603   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2604   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2605   for (i = 0; i < m; i++) {
2606     nz = ii[i + 1] - ii[i];
2607     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2608     nz_max = PetscMax(nz_max, nz);
2609     dlen   = 0;
2610     olen   = 0;
2611     JJ     = jj + ii[i];
2612     for (j = 0; j < nz; j++) {
2613       if (*JJ < cstart || *JJ >= cend) olen++;
2614       else dlen++;
2615       JJ++;
2616     }
2617     d_nnz[i] = dlen;
2618     o_nnz[i] = olen;
2619   }
2620   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2621   PetscCall(PetscFree2(d_nnz, o_nnz));
2622 
2623   values = (PetscScalar *)V;
2624   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2625   for (i = 0; i < m; i++) {
2626     PetscInt        row   = i + rstart;
2627     PetscInt        ncols = ii[i + 1] - ii[i];
2628     const PetscInt *icols = jj + ii[i];
2629     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2630       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2631       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2632     } else { /* block ordering does not match so we can only insert one block at a time. */
2633       PetscInt j;
2634       for (j = 0; j < ncols; j++) {
2635         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2636         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2637       }
2638     }
2639   }
2640 
2641   if (!V) PetscCall(PetscFree(values));
2642   nooffprocentries    = B->nooffprocentries;
2643   B->nooffprocentries = PETSC_TRUE;
2644   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2645   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2646   B->nooffprocentries = nooffprocentries;
2647 
2648   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2649   PetscFunctionReturn(PETSC_SUCCESS);
2650 }
2651 
2652 /*@C
2653   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2654 
2655   Collective
2656 
2657   Input Parameters:
2658 + B  - the matrix
2659 . bs - the block size
2660 . i  - the indices into `j` for the start of each local row (starts with zero)
2661 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2662 - v  - optional values in the matrix
2663 
2664   Level: advanced
2665 
2666   Notes:
2667   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2668   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2669   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2670   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2671   block column and the second index is over columns within a block.
2672 
2673   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2674 
2675 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2676 @*/
2677 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2678 {
2679   PetscFunctionBegin;
2680   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2681   PetscValidType(B, 1);
2682   PetscValidLogicalCollectiveInt(B, bs, 2);
2683   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2684   PetscFunctionReturn(PETSC_SUCCESS);
2685 }
2686 
2687 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2688 {
2689   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2690   PetscInt     i;
2691   PetscMPIInt  size;
2692 
2693   PetscFunctionBegin;
2694   if (B->hash_active) {
2695     B->ops[0]      = b->cops;
2696     B->hash_active = PETSC_FALSE;
2697   }
2698   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2699   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2700   PetscCall(PetscLayoutSetUp(B->rmap));
2701   PetscCall(PetscLayoutSetUp(B->cmap));
2702   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2703 
2704   if (d_nnz) {
2705     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2706   }
2707   if (o_nnz) {
2708     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2709   }
2710 
2711   b->bs2 = bs * bs;
2712   b->mbs = B->rmap->n / bs;
2713   b->nbs = B->cmap->n / bs;
2714   b->Mbs = B->rmap->N / bs;
2715   b->Nbs = B->cmap->N / bs;
2716 
2717   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2718   b->rstartbs = B->rmap->rstart / bs;
2719   b->rendbs   = B->rmap->rend / bs;
2720   b->cstartbs = B->cmap->rstart / bs;
2721   b->cendbs   = B->cmap->rend / bs;
2722 
2723 #if defined(PETSC_USE_CTABLE)
2724   PetscCall(PetscHMapIDestroy(&b->colmap));
2725 #else
2726   PetscCall(PetscFree(b->colmap));
2727 #endif
2728   PetscCall(PetscFree(b->garray));
2729   PetscCall(VecDestroy(&b->lvec));
2730   PetscCall(VecScatterDestroy(&b->Mvctx));
2731 
2732   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2733   PetscCall(MatDestroy(&b->B));
2734   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2735   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2736   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2737 
2738   PetscCall(MatDestroy(&b->A));
2739   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2740   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2741   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2742 
2743   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2744   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2745   B->preallocated  = PETSC_TRUE;
2746   B->was_assembled = PETSC_FALSE;
2747   B->assembled     = PETSC_FALSE;
2748   PetscFunctionReturn(PETSC_SUCCESS);
2749 }
2750 
2751 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2752 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2753 
2754 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2755 {
2756   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2757   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2758   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2759   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2760 
2761   PetscFunctionBegin;
2762   PetscCall(PetscMalloc1(M + 1, &ii));
2763   ii[0] = 0;
2764   for (i = 0; i < M; i++) {
2765     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2766     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2767     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2768     /* remove one from count of matrix has diagonal */
2769     for (j = id[i]; j < id[i + 1]; j++) {
2770       if (jd[j] == i) {
2771         ii[i + 1]--;
2772         break;
2773       }
2774     }
2775   }
2776   PetscCall(PetscMalloc1(ii[M], &jj));
2777   cnt = 0;
2778   for (i = 0; i < M; i++) {
2779     for (j = io[i]; j < io[i + 1]; j++) {
2780       if (garray[jo[j]] > rstart) break;
2781       jj[cnt++] = garray[jo[j]];
2782     }
2783     for (k = id[i]; k < id[i + 1]; k++) {
2784       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2785     }
2786     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2787   }
2788   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2789   PetscFunctionReturn(PETSC_SUCCESS);
2790 }
2791 
2792 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2793 
2794 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2795 
2796 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2797 {
2798   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2799   Mat_MPIAIJ  *b;
2800   Mat          B;
2801 
2802   PetscFunctionBegin;
2803   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2804 
2805   if (reuse == MAT_REUSE_MATRIX) {
2806     B = *newmat;
2807   } else {
2808     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2809     PetscCall(MatSetType(B, MATMPIAIJ));
2810     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2811     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2812     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2813     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2814   }
2815   b = (Mat_MPIAIJ *)B->data;
2816 
2817   if (reuse == MAT_REUSE_MATRIX) {
2818     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2819     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2820   } else {
2821     PetscInt   *garray = a->garray;
2822     Mat_SeqAIJ *bB;
2823     PetscInt    bs, nnz;
2824     PetscCall(MatDestroy(&b->A));
2825     PetscCall(MatDestroy(&b->B));
2826     /* just clear out the data structure */
2827     PetscCall(MatDisAssemble_MPIAIJ(B));
2828     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2829     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2830 
2831     /* Global numbering for b->B columns */
2832     bB  = (Mat_SeqAIJ *)b->B->data;
2833     bs  = A->rmap->bs;
2834     nnz = bB->i[A->rmap->n];
2835     for (PetscInt k = 0; k < nnz; k++) {
2836       PetscInt bj = bB->j[k] / bs;
2837       PetscInt br = bB->j[k] % bs;
2838       bB->j[k]    = garray[bj] * bs + br;
2839     }
2840   }
2841   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2842   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2843 
2844   if (reuse == MAT_INPLACE_MATRIX) {
2845     PetscCall(MatHeaderReplace(A, &B));
2846   } else {
2847     *newmat = B;
2848   }
2849   PetscFunctionReturn(PETSC_SUCCESS);
2850 }
2851 
2852 /*MC
2853    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2854 
2855    Options Database Keys:
2856 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2857 . -mat_block_size <bs> - set the blocksize used to store the matrix
2858 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2859 - -mat_use_hash_table <fact> - set hash table factor
2860 
2861    Level: beginner
2862 
2863    Note:
2864     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2865     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2866 
2867 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2868 M*/
2869 
2870 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2871 
2872 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2873 {
2874   Mat_MPIBAIJ *b;
2875   PetscBool    flg = PETSC_FALSE;
2876 
2877   PetscFunctionBegin;
2878   PetscCall(PetscNew(&b));
2879   B->data      = (void *)b;
2880   B->ops[0]    = MatOps_Values;
2881   B->assembled = PETSC_FALSE;
2882 
2883   B->insertmode = NOT_SET_VALUES;
2884   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2885   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2886 
2887   /* build local table of row and column ownerships */
2888   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2889 
2890   /* build cache for off array entries formed */
2891   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2892 
2893   b->donotstash  = PETSC_FALSE;
2894   b->colmap      = NULL;
2895   b->garray      = NULL;
2896   b->roworiented = PETSC_TRUE;
2897 
2898   /* stuff used in block assembly */
2899   b->barray = NULL;
2900 
2901   /* stuff used for matrix vector multiply */
2902   b->lvec  = NULL;
2903   b->Mvctx = NULL;
2904 
2905   /* stuff for MatGetRow() */
2906   b->rowindices   = NULL;
2907   b->rowvalues    = NULL;
2908   b->getrowactive = PETSC_FALSE;
2909 
2910   /* hash table stuff */
2911   b->ht           = NULL;
2912   b->hd           = NULL;
2913   b->ht_size      = 0;
2914   b->ht_flag      = PETSC_FALSE;
2915   b->ht_fact      = 0;
2916   b->ht_total_ct  = 0;
2917   b->ht_insert_ct = 0;
2918 
2919   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2920   b->ijonly = PETSC_FALSE;
2921 
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2925 #if defined(PETSC_HAVE_HYPRE)
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2927 #endif
2928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2935   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2936 
2937   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2938   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2939   if (flg) {
2940     PetscReal fact = 1.39;
2941     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2942     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2943     if (fact <= 1.0) fact = 1.39;
2944     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2945     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2946   }
2947   PetscOptionsEnd();
2948   PetscFunctionReturn(PETSC_SUCCESS);
2949 }
2950 
2951 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2952 /*MC
2953    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2954 
2955    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2956    and `MATMPIBAIJ` otherwise.
2957 
2958    Options Database Keys:
2959 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2960 
2961   Level: beginner
2962 
2963 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2964 M*/
2965 
2966 /*@C
2967   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2968   (block compressed row).
2969 
2970   Collective
2971 
2972   Input Parameters:
2973 + B     - the matrix
2974 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2975           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2976 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2977            submatrix  (same for all local rows)
2978 . d_nnz - array containing the number of block nonzeros in the various block rows
2979            of the in diagonal portion of the local (possibly different for each block
2980            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2981            set it even if it is zero.
2982 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2983            submatrix (same for all local rows).
2984 - o_nnz - array containing the number of nonzeros in the various block rows of the
2985            off-diagonal portion of the local submatrix (possibly different for
2986            each block row) or `NULL`.
2987 
2988    If the *_nnz parameter is given then the *_nz parameter is ignored
2989 
2990   Options Database Keys:
2991 + -mat_block_size            - size of the blocks to use
2992 - -mat_use_hash_table <fact> - set hash table factor
2993 
2994   Level: intermediate
2995 
2996   Notes:
2997   For good matrix assembly performance
2998   the user should preallocate the matrix storage by setting the parameters
2999   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3000   performance can be increased by more than a factor of 50.
3001 
3002   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3003   than it must be used on all processors that share the object for that argument.
3004 
3005   Storage Information:
3006   For a square global matrix we define each processor's diagonal portion
3007   to be its local rows and the corresponding columns (a square submatrix);
3008   each processor's off-diagonal portion encompasses the remainder of the
3009   local matrix (a rectangular submatrix).
3010 
3011   The user can specify preallocated storage for the diagonal part of
3012   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3013   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3014   memory allocation.  Likewise, specify preallocated storage for the
3015   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3016 
3017   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3018   the figure below we depict these three local rows and all columns (0-11).
3019 
3020 .vb
3021            0 1 2 3 4 5 6 7 8 9 10 11
3022           --------------------------
3023    row 3  |o o o d d d o o o o  o  o
3024    row 4  |o o o d d d o o o o  o  o
3025    row 5  |o o o d d d o o o o  o  o
3026           --------------------------
3027 .ve
3028 
3029   Thus, any entries in the d locations are stored in the d (diagonal)
3030   submatrix, and any entries in the o locations are stored in the
3031   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3032   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3033 
3034   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3035   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3036   In general, for PDE problems in which most nonzeros are near the diagonal,
3037   one expects `d_nz` >> `o_nz`.
3038 
3039   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3040   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3041   You can also run with the option `-info` and look for messages with the string
3042   malloc in them to see if additional memory allocation was needed.
3043 
3044 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3045 @*/
3046 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3047 {
3048   PetscFunctionBegin;
3049   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3050   PetscValidType(B, 1);
3051   PetscValidLogicalCollectiveInt(B, bs, 2);
3052   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3053   PetscFunctionReturn(PETSC_SUCCESS);
3054 }
3055 
3056 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3057 /*@C
3058   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3059   (block compressed row).
3060 
3061   Collective
3062 
3063   Input Parameters:
3064 + comm  - MPI communicator
3065 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3066           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3067 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3068            This value should be the same as the local size used in creating the
3069            y vector for the matrix-vector product y = Ax.
3070 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3071            This value should be the same as the local size used in creating the
3072            x vector for the matrix-vector product y = Ax.
3073 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3074 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3075 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3076            submatrix  (same for all local rows)
3077 . d_nnz - array containing the number of nonzero blocks in the various block rows
3078            of the in diagonal portion of the local (possibly different for each block
3079            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3080            and set it even if it is zero.
3081 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3082            submatrix (same for all local rows).
3083 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3084            off-diagonal portion of the local submatrix (possibly different for
3085            each block row) or NULL.
3086 
3087   Output Parameter:
3088 . A - the matrix
3089 
3090   Options Database Keys:
3091 + -mat_block_size            - size of the blocks to use
3092 - -mat_use_hash_table <fact> - set hash table factor
3093 
3094   Level: intermediate
3095 
3096   Notes:
3097   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3098   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3099   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3100 
3101   For good matrix assembly performance
3102   the user should preallocate the matrix storage by setting the parameters
3103   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3104   performance can be increased by more than a factor of 50.
3105 
3106   If the *_nnz parameter is given then the *_nz parameter is ignored
3107 
3108   A nonzero block is any block that as 1 or more nonzeros in it
3109 
3110   The user MUST specify either the local or global matrix dimensions
3111   (possibly both).
3112 
3113   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3114   than it must be used on all processors that share the object for that argument.
3115 
3116   Storage Information:
3117   For a square global matrix we define each processor's diagonal portion
3118   to be its local rows and the corresponding columns (a square submatrix);
3119   each processor's off-diagonal portion encompasses the remainder of the
3120   local matrix (a rectangular submatrix).
3121 
3122   The user can specify preallocated storage for the diagonal part of
3123   the local submatrix with either d_nz or d_nnz (not both).  Set
3124   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3125   memory allocation.  Likewise, specify preallocated storage for the
3126   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3127 
3128   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3129   the figure below we depict these three local rows and all columns (0-11).
3130 
3131 .vb
3132            0 1 2 3 4 5 6 7 8 9 10 11
3133           --------------------------
3134    row 3  |o o o d d d o o o o  o  o
3135    row 4  |o o o d d d o o o o  o  o
3136    row 5  |o o o d d d o o o o  o  o
3137           --------------------------
3138 .ve
3139 
3140   Thus, any entries in the d locations are stored in the d (diagonal)
3141   submatrix, and any entries in the o locations are stored in the
3142   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3143   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3144 
3145   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3146   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3147   In general, for PDE problems in which most nonzeros are near the diagonal,
3148   one expects `d_nz` >> `o_nz`.
3149 
3150 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3151 @*/
3152 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3153 {
3154   PetscMPIInt size;
3155 
3156   PetscFunctionBegin;
3157   PetscCall(MatCreate(comm, A));
3158   PetscCall(MatSetSizes(*A, m, n, M, N));
3159   PetscCallMPI(MPI_Comm_size(comm, &size));
3160   if (size > 1) {
3161     PetscCall(MatSetType(*A, MATMPIBAIJ));
3162     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3163   } else {
3164     PetscCall(MatSetType(*A, MATSEQBAIJ));
3165     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3166   }
3167   PetscFunctionReturn(PETSC_SUCCESS);
3168 }
3169 
3170 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3171 {
3172   Mat          mat;
3173   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3174   PetscInt     len = 0;
3175 
3176   PetscFunctionBegin;
3177   *newmat = NULL;
3178   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3179   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3180   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3181 
3182   mat->factortype   = matin->factortype;
3183   mat->preallocated = PETSC_TRUE;
3184   mat->assembled    = PETSC_TRUE;
3185   mat->insertmode   = NOT_SET_VALUES;
3186 
3187   a             = (Mat_MPIBAIJ *)mat->data;
3188   mat->rmap->bs = matin->rmap->bs;
3189   a->bs2        = oldmat->bs2;
3190   a->mbs        = oldmat->mbs;
3191   a->nbs        = oldmat->nbs;
3192   a->Mbs        = oldmat->Mbs;
3193   a->Nbs        = oldmat->Nbs;
3194 
3195   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3196   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3197 
3198   a->size         = oldmat->size;
3199   a->rank         = oldmat->rank;
3200   a->donotstash   = oldmat->donotstash;
3201   a->roworiented  = oldmat->roworiented;
3202   a->rowindices   = NULL;
3203   a->rowvalues    = NULL;
3204   a->getrowactive = PETSC_FALSE;
3205   a->barray       = NULL;
3206   a->rstartbs     = oldmat->rstartbs;
3207   a->rendbs       = oldmat->rendbs;
3208   a->cstartbs     = oldmat->cstartbs;
3209   a->cendbs       = oldmat->cendbs;
3210 
3211   /* hash table stuff */
3212   a->ht           = NULL;
3213   a->hd           = NULL;
3214   a->ht_size      = 0;
3215   a->ht_flag      = oldmat->ht_flag;
3216   a->ht_fact      = oldmat->ht_fact;
3217   a->ht_total_ct  = 0;
3218   a->ht_insert_ct = 0;
3219 
3220   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3221   if (oldmat->colmap) {
3222 #if defined(PETSC_USE_CTABLE)
3223     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3224 #else
3225     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3226     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3227 #endif
3228   } else a->colmap = NULL;
3229 
3230   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3231     PetscCall(PetscMalloc1(len, &a->garray));
3232     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3233   } else a->garray = NULL;
3234 
3235   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3236   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3237   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3238 
3239   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3240   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3241   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3242   *newmat = mat;
3243   PetscFunctionReturn(PETSC_SUCCESS);
3244 }
3245 
3246 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3247 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3248 {
3249   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3250   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3251   PetscScalar *matvals;
3252 
3253   PetscFunctionBegin;
3254   PetscCall(PetscViewerSetUp(viewer));
3255 
3256   /* read in matrix header */
3257   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3258   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3259   M  = header[1];
3260   N  = header[2];
3261   nz = header[3];
3262   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3263   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3264   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3265 
3266   /* set block sizes from the viewer's .info file */
3267   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3268   /* set local sizes if not set already */
3269   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3270   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3271   /* set global sizes if not set already */
3272   if (mat->rmap->N < 0) mat->rmap->N = M;
3273   if (mat->cmap->N < 0) mat->cmap->N = N;
3274   PetscCall(PetscLayoutSetUp(mat->rmap));
3275   PetscCall(PetscLayoutSetUp(mat->cmap));
3276 
3277   /* check if the matrix sizes are correct */
3278   PetscCall(MatGetSize(mat, &rows, &cols));
3279   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3280   PetscCall(MatGetBlockSize(mat, &bs));
3281   PetscCall(MatGetLocalSize(mat, &m, &n));
3282   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3283   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3284   mbs = m / bs;
3285   nbs = n / bs;
3286 
3287   /* read in row lengths and build row indices */
3288   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3289   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3290   rowidxs[0] = 0;
3291   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3292   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3293   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3294 
3295   /* read in column indices and matrix values */
3296   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3297   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3298   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3299 
3300   {                /* preallocate matrix storage */
3301     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3302     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3303     PetscBool  sbaij, done;
3304     PetscInt  *d_nnz, *o_nnz;
3305 
3306     PetscCall(PetscBTCreate(nbs, &bt));
3307     PetscCall(PetscHSetICreate(&ht));
3308     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3309     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3310     for (i = 0; i < mbs; i++) {
3311       PetscCall(PetscBTMemzero(nbs, bt));
3312       PetscCall(PetscHSetIClear(ht));
3313       for (k = 0; k < bs; k++) {
3314         PetscInt row = bs * i + k;
3315         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3316           PetscInt col = colidxs[j];
3317           if (!sbaij || col >= row) {
3318             if (col >= cs && col < ce) {
3319               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3320             } else {
3321               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3322               if (done) o_nnz[i]++;
3323             }
3324           }
3325         }
3326       }
3327     }
3328     PetscCall(PetscBTDestroy(&bt));
3329     PetscCall(PetscHSetIDestroy(&ht));
3330     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3331     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3332     PetscCall(PetscFree2(d_nnz, o_nnz));
3333   }
3334 
3335   /* store matrix values */
3336   for (i = 0; i < m; i++) {
3337     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3338     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3339   }
3340 
3341   PetscCall(PetscFree(rowidxs));
3342   PetscCall(PetscFree2(colidxs, matvals));
3343   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3344   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3345   PetscFunctionReturn(PETSC_SUCCESS);
3346 }
3347 
3348 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3349 {
3350   PetscBool isbinary;
3351 
3352   PetscFunctionBegin;
3353   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3354   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3355   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3356   PetscFunctionReturn(PETSC_SUCCESS);
3357 }
3358 
3359 /*@
3360   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3361 
3362   Input Parameters:
3363 + mat  - the matrix
3364 - fact - factor
3365 
3366   Options Database Key:
3367 . -mat_use_hash_table <fact> - provide the factor
3368 
3369   Level: advanced
3370 
3371 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3372 @*/
3373 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3374 {
3375   PetscFunctionBegin;
3376   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3377   PetscFunctionReturn(PETSC_SUCCESS);
3378 }
3379 
3380 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3381 {
3382   Mat_MPIBAIJ *baij;
3383 
3384   PetscFunctionBegin;
3385   baij          = (Mat_MPIBAIJ *)mat->data;
3386   baij->ht_fact = fact;
3387   PetscFunctionReturn(PETSC_SUCCESS);
3388 }
3389 
3390 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3391 {
3392   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3393   PetscBool    flg;
3394 
3395   PetscFunctionBegin;
3396   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3397   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3398   if (Ad) *Ad = a->A;
3399   if (Ao) *Ao = a->B;
3400   if (colmap) *colmap = a->garray;
3401   PetscFunctionReturn(PETSC_SUCCESS);
3402 }
3403 
3404 /*
3405     Special version for direct calls from Fortran (to eliminate two function call overheads
3406 */
3407 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3408   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3409 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3410   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3411 #endif
3412 
3413 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3414 /*@C
3415   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3416 
3417   Collective
3418 
3419   Input Parameters:
3420 + matin  - the matrix
3421 . min    - number of input rows
3422 . im     - input rows
3423 . nin    - number of input columns
3424 . in     - input columns
3425 . v      - numerical values input
3426 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3427 
3428   Level: advanced
3429 
3430   Developer Notes:
3431   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3432 
3433 .seealso: `Mat`, `MatSetValuesBlocked()`
3434 @*/
3435 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3436 {
3437   /* convert input arguments to C version */
3438   Mat        mat = *matin;
3439   PetscInt   m = *min, n = *nin;
3440   InsertMode addv = *addvin;
3441 
3442   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3443   const MatScalar *value;
3444   MatScalar       *barray      = baij->barray;
3445   PetscBool        roworiented = baij->roworiented;
3446   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3447   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3448   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3449 
3450   PetscFunctionBegin;
3451   /* tasks normally handled by MatSetValuesBlocked() */
3452   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3453   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3454   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3455   if (mat->assembled) {
3456     mat->was_assembled = PETSC_TRUE;
3457     mat->assembled     = PETSC_FALSE;
3458   }
3459   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3460 
3461   if (!barray) {
3462     PetscCall(PetscMalloc1(bs2, &barray));
3463     baij->barray = barray;
3464   }
3465 
3466   if (roworiented) stepval = (n - 1) * bs;
3467   else stepval = (m - 1) * bs;
3468 
3469   for (i = 0; i < m; i++) {
3470     if (im[i] < 0) continue;
3471     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3472     if (im[i] >= rstart && im[i] < rend) {
3473       row = im[i] - rstart;
3474       for (j = 0; j < n; j++) {
3475         /* If NumCol = 1 then a copy is not required */
3476         if ((roworiented) && (n == 1)) {
3477           barray = (MatScalar *)v + i * bs2;
3478         } else if ((!roworiented) && (m == 1)) {
3479           barray = (MatScalar *)v + j * bs2;
3480         } else { /* Here a copy is required */
3481           if (roworiented) {
3482             value = v + i * (stepval + bs) * bs + j * bs;
3483           } else {
3484             value = v + j * (stepval + bs) * bs + i * bs;
3485           }
3486           for (ii = 0; ii < bs; ii++, value += stepval) {
3487             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3488           }
3489           barray -= bs2;
3490         }
3491 
3492         if (in[j] >= cstart && in[j] < cend) {
3493           col = in[j] - cstart;
3494           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3495         } else if (in[j] < 0) {
3496           continue;
3497         } else {
3498           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3499           if (mat->was_assembled) {
3500             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3501 
3502 #if defined(PETSC_USE_DEBUG)
3503   #if defined(PETSC_USE_CTABLE)
3504             {
3505               PetscInt data;
3506               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3507               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3508             }
3509   #else
3510             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3511   #endif
3512 #endif
3513 #if defined(PETSC_USE_CTABLE)
3514             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3515             col = (col - 1) / bs;
3516 #else
3517             col = (baij->colmap[in[j]] - 1) / bs;
3518 #endif
3519             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3520               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3521               col = in[j];
3522             }
3523           } else col = in[j];
3524           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3525         }
3526       }
3527     } else {
3528       if (!baij->donotstash) {
3529         if (roworiented) {
3530           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3531         } else {
3532           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3533         }
3534       }
3535     }
3536   }
3537 
3538   /* task normally handled by MatSetValuesBlocked() */
3539   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3540   PetscFunctionReturn(PETSC_SUCCESS);
3541 }
3542 
3543 /*@
3544   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3545   CSR format the local rows.
3546 
3547   Collective
3548 
3549   Input Parameters:
3550 + comm - MPI communicator
3551 . bs   - the block size, only a block size of 1 is supported
3552 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3553 . n    - This value should be the same as the local size used in creating the
3554        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3555        calculated if N is given) For square matrices n is almost always m.
3556 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3557 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3558 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3559 . j    - column indices
3560 - a    - matrix values
3561 
3562   Output Parameter:
3563 . mat - the matrix
3564 
3565   Level: intermediate
3566 
3567   Notes:
3568   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3569   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3570   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3571 
3572   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3573   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3574   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3575   with column-major ordering within blocks.
3576 
3577   The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array.
3578 
3579 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3580           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3581 @*/
3582 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3583 {
3584   PetscFunctionBegin;
3585   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3586   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3587   PetscCall(MatCreate(comm, mat));
3588   PetscCall(MatSetSizes(*mat, m, n, M, N));
3589   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3590   PetscCall(MatSetBlockSize(*mat, bs));
3591   PetscCall(MatSetUp(*mat));
3592   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3593   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3594   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3595   PetscFunctionReturn(PETSC_SUCCESS);
3596 }
3597 
3598 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3599 {
3600   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3601   PetscInt    *indx;
3602   PetscScalar *values;
3603 
3604   PetscFunctionBegin;
3605   PetscCall(MatGetSize(inmat, &m, &N));
3606   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3607     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3608     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3609     PetscInt    *bindx, rmax = a->rmax, j;
3610     PetscMPIInt  rank, size;
3611 
3612     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3613     mbs = m / bs;
3614     Nbs = N / cbs;
3615     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3616     nbs = n / cbs;
3617 
3618     PetscCall(PetscMalloc1(rmax, &bindx));
3619     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3620 
3621     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3622     PetscCallMPI(MPI_Comm_rank(comm, &size));
3623     if (rank == size - 1) {
3624       /* Check sum(nbs) = Nbs */
3625       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3626     }
3627 
3628     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3629     for (i = 0; i < mbs; i++) {
3630       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3631       nnz = nnz / bs;
3632       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3633       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3634       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3635     }
3636     PetscCall(PetscFree(bindx));
3637 
3638     PetscCall(MatCreate(comm, outmat));
3639     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3640     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3641     PetscCall(MatSetType(*outmat, MATBAIJ));
3642     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3643     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3644     MatPreallocateEnd(dnz, onz);
3645     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3646   }
3647 
3648   /* numeric phase */
3649   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3650   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3651 
3652   for (i = 0; i < m; i++) {
3653     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3654     Ii = i + rstart;
3655     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3656     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3657   }
3658   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3659   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3660   PetscFunctionReturn(PETSC_SUCCESS);
3661 }
3662