xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a4e35b1925eceef64945ea472b84f2bf06a67b5e)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
101 {
102   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
103 
104   PetscFunctionBegin;
105   PetscCall(MatStoreValues(aij->A));
106   PetscCall(MatStoreValues(aij->B));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
111 {
112   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
113 
114   PetscFunctionBegin;
115   PetscCall(MatRetrieveValues(aij->A));
116   PetscCall(MatRetrieveValues(aij->B));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 /*
121      Local utility routine that creates a mapping from the global column
122    number to the local number in the off-diagonal part of the local
123    storage of the matrix.  This is done in a non scalable way since the
124    length of colmap equals the global matrix length.
125 */
126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
127 {
128   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
129   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
130   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
131 
132   PetscFunctionBegin;
133 #if defined(PETSC_USE_CTABLE)
134   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
135   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
136 #else
137   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
138   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
139 #endif
140   PetscFunctionReturn(PETSC_SUCCESS);
141 }
142 
143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
144   do { \
145     brow = row / bs; \
146     rp   = aj + ai[brow]; \
147     ap   = aa + bs2 * ai[brow]; \
148     rmax = aimax[brow]; \
149     nrow = ailen[brow]; \
150     bcol = col / bs; \
151     ridx = row % bs; \
152     cidx = col % bs; \
153     low  = 0; \
154     high = nrow; \
155     while (high - low > 3) { \
156       t = (low + high) / 2; \
157       if (rp[t] > bcol) high = t; \
158       else low = t; \
159     } \
160     for (_i = low; _i < high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap + bs2 * _i + bs * cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value; \
165         else *bap = value; \
166         goto a_noinsert; \
167       } \
168     } \
169     if (a->nonew == 1) goto a_noinsert; \
170     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
171     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
172     N = nrow++ - 1; \
173     /* shift up all the later entries in this row */ \
174     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
175     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
176     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
177     rp[_i]                          = bcol; \
178     ap[bs2 * _i + bs * cidx + ridx] = value; \
179   a_noinsert:; \
180     ailen[brow] = nrow; \
181   } while (0)
182 
183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
184   do { \
185     brow = row / bs; \
186     rp   = bj + bi[brow]; \
187     ap   = ba + bs2 * bi[brow]; \
188     rmax = bimax[brow]; \
189     nrow = bilen[brow]; \
190     bcol = col / bs; \
191     ridx = row % bs; \
192     cidx = col % bs; \
193     low  = 0; \
194     high = nrow; \
195     while (high - low > 3) { \
196       t = (low + high) / 2; \
197       if (rp[t] > bcol) high = t; \
198       else low = t; \
199     } \
200     for (_i = low; _i < high; _i++) { \
201       if (rp[_i] > bcol) break; \
202       if (rp[_i] == bcol) { \
203         bap = ap + bs2 * _i + bs * cidx + ridx; \
204         if (addv == ADD_VALUES) *bap += value; \
205         else *bap = value; \
206         goto b_noinsert; \
207       } \
208     } \
209     if (b->nonew == 1) goto b_noinsert; \
210     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
211     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
212     N = nrow++ - 1; \
213     /* shift up all the later entries in this row */ \
214     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
215     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
216     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
217     rp[_i]                          = bcol; \
218     ap[bs2 * _i + bs * cidx + ridx] = value; \
219   b_noinsert:; \
220     bilen[brow] = nrow; \
221   } while (0)
222 
223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
224 {
225   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
226   MatScalar    value;
227   PetscBool    roworiented = baij->roworiented;
228   PetscInt     i, j, row, col;
229   PetscInt     rstart_orig = mat->rmap->rstart;
230   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
231   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
232 
233   /* Some Variables required in the macro */
234   Mat          A     = baij->A;
235   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
236   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
237   MatScalar   *aa = a->a;
238 
239   Mat          B     = baij->B;
240   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
241   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
242   MatScalar   *ba = b->a;
243 
244   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
245   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
246   MatScalar *ap, *bap;
247 
248   PetscFunctionBegin;
249   for (i = 0; i < m; i++) {
250     if (im[i] < 0) continue;
251     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
252     if (im[i] >= rstart_orig && im[i] < rend_orig) {
253       row = im[i] - rstart_orig;
254       for (j = 0; j < n; j++) {
255         if (in[j] >= cstart_orig && in[j] < cend_orig) {
256           col = in[j] - cstart_orig;
257           if (roworiented) value = v[i * n + j];
258           else value = v[i + j * m];
259           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
260         } else if (in[j] < 0) {
261           continue;
262         } else {
263           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
264           if (mat->was_assembled) {
265             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
266 #if defined(PETSC_USE_CTABLE)
267             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
268             col = col - 1;
269 #else
270             col = baij->colmap[in[j] / bs] - 1;
271 #endif
272             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
273               PetscCall(MatDisAssemble_MPIBAIJ(mat));
274               col = in[j];
275               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
276               B     = baij->B;
277               b     = (Mat_SeqBAIJ *)(B)->data;
278               bimax = b->imax;
279               bi    = b->i;
280               bilen = b->ilen;
281               bj    = b->j;
282               ba    = b->a;
283             } else {
284               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
285               col += in[j] % bs;
286             }
287           } else col = in[j];
288           if (roworiented) value = v[i * n + j];
289           else value = v[i + j * m];
290           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
291           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
292         }
293       }
294     } else {
295       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
296       if (!baij->donotstash) {
297         mat->assembled = PETSC_FALSE;
298         if (roworiented) {
299           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
300         } else {
301           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
302         }
303       }
304     }
305   }
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
310 {
311   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
312   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
313   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
314   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
315   PetscBool          roworiented = a->roworiented;
316   const PetscScalar *value       = v;
317   MatScalar         *ap, *aa = a->a, *bap;
318 
319   PetscFunctionBegin;
320   rp    = aj + ai[row];
321   ap    = aa + bs2 * ai[row];
322   rmax  = imax[row];
323   nrow  = ailen[row];
324   value = v;
325   low   = 0;
326   high  = nrow;
327   while (high - low > 7) {
328     t = (low + high) / 2;
329     if (rp[t] > col) high = t;
330     else low = t;
331   }
332   for (i = low; i < high; i++) {
333     if (rp[i] > col) break;
334     if (rp[i] == col) {
335       bap = ap + bs2 * i;
336       if (roworiented) {
337         if (is == ADD_VALUES) {
338           for (ii = 0; ii < bs; ii++) {
339             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
340           }
341         } else {
342           for (ii = 0; ii < bs; ii++) {
343             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
344           }
345         }
346       } else {
347         if (is == ADD_VALUES) {
348           for (ii = 0; ii < bs; ii++, value += bs) {
349             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
350             bap += bs;
351           }
352         } else {
353           for (ii = 0; ii < bs; ii++, value += bs) {
354             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
355             bap += bs;
356           }
357         }
358       }
359       goto noinsert2;
360     }
361   }
362   if (nonew == 1) goto noinsert2;
363   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
364   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
365   N = nrow++ - 1;
366   high++;
367   /* shift up all the later entries in this row */
368   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
369   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
370   rp[i] = col;
371   bap   = ap + bs2 * i;
372   if (roworiented) {
373     for (ii = 0; ii < bs; ii++) {
374       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
375     }
376   } else {
377     for (ii = 0; ii < bs; ii++) {
378       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
379     }
380   }
381 noinsert2:;
382   ailen[row] = nrow;
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 /*
387     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
388     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
389 */
390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
391 {
392   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
393   const PetscScalar *value;
394   MatScalar         *barray      = baij->barray;
395   PetscBool          roworiented = baij->roworiented;
396   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
397   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
398   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
399 
400   PetscFunctionBegin;
401   if (!barray) {
402     PetscCall(PetscMalloc1(bs2, &barray));
403     baij->barray = barray;
404   }
405 
406   if (roworiented) stepval = (n - 1) * bs;
407   else stepval = (m - 1) * bs;
408 
409   for (i = 0; i < m; i++) {
410     if (im[i] < 0) continue;
411     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
412     if (im[i] >= rstart && im[i] < rend) {
413       row = im[i] - rstart;
414       for (j = 0; j < n; j++) {
415         /* If NumCol = 1 then a copy is not required */
416         if ((roworiented) && (n == 1)) {
417           barray = (MatScalar *)v + i * bs2;
418         } else if ((!roworiented) && (m == 1)) {
419           barray = (MatScalar *)v + j * bs2;
420         } else { /* Here a copy is required */
421           if (roworiented) {
422             value = v + (i * (stepval + bs) + j) * bs;
423           } else {
424             value = v + (j * (stepval + bs) + i) * bs;
425           }
426           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
427             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
428             barray += bs;
429           }
430           barray -= bs2;
431         }
432 
433         if (in[j] >= cstart && in[j] < cend) {
434           col = in[j] - cstart;
435           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
436         } else if (in[j] < 0) {
437           continue;
438         } else {
439           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
440           if (mat->was_assembled) {
441             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
442 
443 #if defined(PETSC_USE_DEBUG)
444   #if defined(PETSC_USE_CTABLE)
445             {
446               PetscInt data;
447               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
448               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
449             }
450   #else
451             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
452   #endif
453 #endif
454 #if defined(PETSC_USE_CTABLE)
455             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
456             col = (col - 1) / bs;
457 #else
458             col = (baij->colmap[in[j]] - 1) / bs;
459 #endif
460             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
461               PetscCall(MatDisAssemble_MPIBAIJ(mat));
462               col = in[j];
463             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
464           } else col = in[j];
465           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
466         }
467       }
468     } else {
469       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
470       if (!baij->donotstash) {
471         if (roworiented) {
472           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
473         } else {
474           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475         }
476       }
477     }
478   }
479   PetscFunctionReturn(PETSC_SUCCESS);
480 }
481 
482 #define HASH_KEY             0.6180339887
483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
486 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
487 {
488   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
489   PetscBool    roworiented = baij->roworiented;
490   PetscInt     i, j, row, col;
491   PetscInt     rstart_orig = mat->rmap->rstart;
492   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
493   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
494   PetscReal    tmp;
495   MatScalar  **HD       = baij->hd, value;
496   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
497 
498   PetscFunctionBegin;
499   for (i = 0; i < m; i++) {
500     if (PetscDefined(USE_DEBUG)) {
501       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
502       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
503     }
504     row = im[i];
505     if (row >= rstart_orig && row < rend_orig) {
506       for (j = 0; j < n; j++) {
507         col = in[j];
508         if (roworiented) value = v[i * n + j];
509         else value = v[i + j * m];
510         /* Look up PetscInto the Hash Table */
511         key = (row / bs) * Nbs + (col / bs) + 1;
512         h1  = HASH(size, key, tmp);
513 
514         idx = h1;
515         if (PetscDefined(USE_DEBUG)) {
516           insert_ct++;
517           total_ct++;
518           if (HT[idx] != key) {
519             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
520               ;
521             if (idx == size) {
522               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
523                 ;
524               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
525             }
526           }
527         } else if (HT[idx] != key) {
528           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
529             ;
530           if (idx == size) {
531             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
532               ;
533             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
534           }
535         }
536         /* A HASH table entry is found, so insert the values at the correct address */
537         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
538         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
539       }
540     } else if (!baij->donotstash) {
541       if (roworiented) {
542         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
543       } else {
544         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
545       }
546     }
547   }
548   if (PetscDefined(USE_DEBUG)) {
549     baij->ht_total_ct += total_ct;
550     baij->ht_insert_ct += insert_ct;
551   }
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
556 {
557   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
558   PetscBool          roworiented = baij->roworiented;
559   PetscInt           i, j, ii, jj, row, col;
560   PetscInt           rstart = baij->rstartbs;
561   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
562   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
563   PetscReal          tmp;
564   MatScalar        **HD = baij->hd, *baij_a;
565   const PetscScalar *v_t, *value;
566   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
567 
568   PetscFunctionBegin;
569   if (roworiented) stepval = (n - 1) * bs;
570   else stepval = (m - 1) * bs;
571 
572   for (i = 0; i < m; i++) {
573     if (PetscDefined(USE_DEBUG)) {
574       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
575       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
576     }
577     row = im[i];
578     v_t = v + i * nbs2;
579     if (row >= rstart && row < rend) {
580       for (j = 0; j < n; j++) {
581         col = in[j];
582 
583         /* Look up into the Hash Table */
584         key = row * Nbs + col + 1;
585         h1  = HASH(size, key, tmp);
586 
587         idx = h1;
588         if (PetscDefined(USE_DEBUG)) {
589           total_ct++;
590           insert_ct++;
591           if (HT[idx] != key) {
592             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
593               ;
594             if (idx == size) {
595               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
596                 ;
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
602             ;
603           if (idx == size) {
604             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
605               ;
606             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
607           }
608         }
609         baij_a = HD[idx];
610         if (roworiented) {
611           /*value = v + i*(stepval+bs)*bs + j*bs;*/
612           /* value = v + (i*(stepval+bs)+j)*bs; */
613           value = v_t;
614           v_t += bs;
615           if (addv == ADD_VALUES) {
616             for (ii = 0; ii < bs; ii++, value += stepval) {
617               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
618             }
619           } else {
620             for (ii = 0; ii < bs; ii++, value += stepval) {
621               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
622             }
623           }
624         } else {
625           value = v + j * (stepval + bs) * bs + i * bs;
626           if (addv == ADD_VALUES) {
627             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
628               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
629             }
630           } else {
631             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
632               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
633             }
634           }
635         }
636       }
637     } else {
638       if (!baij->donotstash) {
639         if (roworiented) {
640           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         } else {
642           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         }
644       }
645     }
646   }
647   if (PetscDefined(USE_DEBUG)) {
648     baij->ht_total_ct += total_ct;
649     baij->ht_insert_ct += insert_ct;
650   }
651   PetscFunctionReturn(PETSC_SUCCESS);
652 }
653 
654 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
655 {
656   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
657   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
658   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
659 
660   PetscFunctionBegin;
661   for (i = 0; i < m; i++) {
662     if (idxm[i] < 0) continue; /* negative row */
663     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
664     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
665       row = idxm[i] - bsrstart;
666       for (j = 0; j < n; j++) {
667         if (idxn[j] < 0) continue; /* negative column */
668         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
669         if (idxn[j] >= bscstart && idxn[j] < bscend) {
670           col = idxn[j] - bscstart;
671           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
672         } else {
673           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
674 #if defined(PETSC_USE_CTABLE)
675           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
676           data--;
677 #else
678           data = baij->colmap[idxn[j] / bs] - 1;
679 #endif
680           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
681           else {
682             col = data + idxn[j] % bs;
683             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
684           }
685         }
686       }
687     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
688   }
689   PetscFunctionReturn(PETSC_SUCCESS);
690 }
691 
692 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
693 {
694   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
695   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
696   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
697   PetscReal    sum = 0.0;
698   MatScalar   *v;
699 
700   PetscFunctionBegin;
701   if (baij->size == 1) {
702     PetscCall(MatNorm(baij->A, type, nrm));
703   } else {
704     if (type == NORM_FROBENIUS) {
705       v  = amat->a;
706       nz = amat->nz * bs2;
707       for (i = 0; i < nz; i++) {
708         sum += PetscRealPart(PetscConj(*v) * (*v));
709         v++;
710       }
711       v  = bmat->a;
712       nz = bmat->nz * bs2;
713       for (i = 0; i < nz; i++) {
714         sum += PetscRealPart(PetscConj(*v) * (*v));
715         v++;
716       }
717       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
718       *nrm = PetscSqrtReal(*nrm);
719     } else if (type == NORM_1) { /* max column sum */
720       PetscReal *tmp, *tmp2;
721       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
722       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
723       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           col = bs * (cstart + *jj) + j; /* column index */
729           for (row = 0; row < bs; row++) {
730             tmp[col] += PetscAbsScalar(*v);
731             v++;
732           }
733         }
734         jj++;
735       }
736       v  = bmat->a;
737       jj = bmat->j;
738       for (i = 0; i < bmat->nz; i++) {
739         for (j = 0; j < bs; j++) {
740           col = bs * garray[*jj] + j;
741           for (row = 0; row < bs; row++) {
742             tmp[col] += PetscAbsScalar(*v);
743             v++;
744           }
745         }
746         jj++;
747       }
748       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
749       *nrm = 0.0;
750       for (j = 0; j < mat->cmap->N; j++) {
751         if (tmp2[j] > *nrm) *nrm = tmp2[j];
752       }
753       PetscCall(PetscFree(tmp));
754       PetscCall(PetscFree(tmp2));
755     } else if (type == NORM_INFINITY) { /* max row sum */
756       PetscReal *sums;
757       PetscCall(PetscMalloc1(bs, &sums));
758       sum = 0.0;
759       for (j = 0; j < amat->mbs; j++) {
760         for (row = 0; row < bs; row++) sums[row] = 0.0;
761         v  = amat->a + bs2 * amat->i[j];
762         nz = amat->i[j + 1] - amat->i[j];
763         for (i = 0; i < nz; i++) {
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               sums[row] += PetscAbsScalar(*v);
767               v++;
768             }
769           }
770         }
771         v  = bmat->a + bs2 * bmat->i[j];
772         nz = bmat->i[j + 1] - bmat->i[j];
773         for (i = 0; i < nz; i++) {
774           for (col = 0; col < bs; col++) {
775             for (row = 0; row < bs; row++) {
776               sums[row] += PetscAbsScalar(*v);
777               v++;
778             }
779           }
780         }
781         for (row = 0; row < bs; row++) {
782           if (sums[row] > sum) sum = sums[row];
783         }
784       }
785       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786       PetscCall(PetscFree(sums));
787     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788   }
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 /*
793   Creates the hash table, and sets the table
794   This table is created only once.
795   If new entried need to be added to the matrix
796   then the hash table has to be destroyed and
797   recreated.
798 */
799 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800 {
801   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802   Mat          A = baij->A, B = baij->B;
803   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807   PetscInt    *HT, key;
808   MatScalar  **HD;
809   PetscReal    tmp;
810 #if defined(PETSC_USE_INFO)
811   PetscInt ct = 0, max = 0;
812 #endif
813 
814   PetscFunctionBegin;
815   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
816 
817   baij->ht_size = (PetscInt)(factor * nz);
818   ht_size       = baij->ht_size;
819 
820   /* Allocate Memory for Hash Table */
821   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822   HD = baij->hd;
823   HT = baij->ht;
824 
825   /* Loop Over A */
826   for (i = 0; i < a->mbs; i++) {
827     for (j = ai[i]; j < ai[i + 1]; j++) {
828       row = i + rstart;
829       col = aj[j] + cstart;
830 
831       key = row * Nbs + col + 1;
832       h1  = HASH(ht_size, key, tmp);
833       for (k = 0; k < ht_size; k++) {
834         if (!HT[(h1 + k) % ht_size]) {
835           HT[(h1 + k) % ht_size] = key;
836           HD[(h1 + k) % ht_size] = a->a + j * bs2;
837           break;
838 #if defined(PETSC_USE_INFO)
839         } else {
840           ct++;
841 #endif
842         }
843       }
844 #if defined(PETSC_USE_INFO)
845       if (k > max) max = k;
846 #endif
847     }
848   }
849   /* Loop Over B */
850   for (i = 0; i < b->mbs; i++) {
851     for (j = bi[i]; j < bi[i + 1]; j++) {
852       row = i + rstart;
853       col = garray[bj[j]];
854       key = row * Nbs + col + 1;
855       h1  = HASH(ht_size, key, tmp);
856       for (k = 0; k < ht_size; k++) {
857         if (!HT[(h1 + k) % ht_size]) {
858           HT[(h1 + k) % ht_size] = key;
859           HD[(h1 + k) % ht_size] = b->a + j * bs2;
860           break;
861 #if defined(PETSC_USE_INFO)
862         } else {
863           ct++;
864 #endif
865         }
866       }
867 #if defined(PETSC_USE_INFO)
868       if (k > max) max = k;
869 #endif
870     }
871   }
872 
873   /* Print Summary */
874 #if defined(PETSC_USE_INFO)
875   for (i = 0, j = 0; i < ht_size; i++) {
876     if (HT[i]) j++;
877   }
878   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
879 #endif
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886   PetscInt     nstash, reallocs;
887 
888   PetscFunctionBegin;
889   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
890 
891   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
900 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901 {
902   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
904   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
905   PetscInt    *row, *col;
906   PetscBool    r1, r2, r3, other_disassembled;
907   MatScalar   *val;
908   PetscMPIInt  n;
909 
910   PetscFunctionBegin;
911   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912   if (!baij->donotstash && !mat->nooffprocentries) {
913     while (1) {
914       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915       if (!flg) break;
916 
917       for (i = 0; i < n;) {
918         /* Now identify the consecutive vals belonging to the same row */
919         for (j = i, rstart = row[j]; j < n; j++) {
920           if (row[j] != rstart) break;
921         }
922         if (j < n) ncols = j - i;
923         else ncols = n - i;
924         /* Now assemble all these values with a single function call */
925         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926         i = j;
927       }
928     }
929     PetscCall(MatStashScatterEnd_Private(&mat->stash));
930     /* Now process the block-stash. Since the values are stashed column-oriented,
931        set the roworiented flag to column oriented, and after MatSetValues()
932        restore the original flags */
933     r1 = baij->roworiented;
934     r2 = a->roworiented;
935     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
936 
937     baij->roworiented = PETSC_FALSE;
938     a->roworiented    = PETSC_FALSE;
939 
940     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
941     while (1) {
942       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
943       if (!flg) break;
944 
945       for (i = 0; i < n;) {
946         /* Now identify the consecutive vals belonging to the same row */
947         for (j = i, rstart = row[j]; j < n; j++) {
948           if (row[j] != rstart) break;
949         }
950         if (j < n) ncols = j - i;
951         else ncols = n - i;
952         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
953         i = j;
954       }
955     }
956     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
957 
958     baij->roworiented = r1;
959     a->roworiented    = r2;
960 
961     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
962   }
963 
964   PetscCall(MatAssemblyBegin(baij->A, mode));
965   PetscCall(MatAssemblyEnd(baij->A, mode));
966 
967   /* determine if any processor has disassembled, if so we must
968      also disassemble ourselves, in order that we may reassemble. */
969   /*
970      if nonzero structure of submatrix B cannot change then we know that
971      no processor disassembled thus we can skip this stuff
972   */
973   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
974     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
975     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
976   }
977 
978   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
979   PetscCall(MatAssemblyBegin(baij->B, mode));
980   PetscCall(MatAssemblyEnd(baij->B, mode));
981 
982 #if defined(PETSC_USE_INFO)
983   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
984     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
985 
986     baij->ht_total_ct  = 0;
987     baij->ht_insert_ct = 0;
988   }
989 #endif
990   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
991     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
992 
993     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
994     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
995   }
996 
997   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
998 
999   baij->rowvalues = NULL;
1000 
1001   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1002   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
1003     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1004     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1005   }
1006   PetscFunctionReturn(PETSC_SUCCESS);
1007 }
1008 
1009 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1010 #include <petscdraw.h>
1011 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1012 {
1013   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1014   PetscMPIInt       rank = baij->rank;
1015   PetscInt          bs   = mat->rmap->bs;
1016   PetscBool         iascii, isdraw;
1017   PetscViewer       sviewer;
1018   PetscViewerFormat format;
1019 
1020   PetscFunctionBegin;
1021   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1022   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1023   if (iascii) {
1024     PetscCall(PetscViewerGetFormat(viewer, &format));
1025     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1026       MatInfo info;
1027       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1028       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1029       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1030       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1031                                                    mat->rmap->bs, (double)info.memory));
1032       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1033       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1034       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1035       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1036       PetscCall(PetscViewerFlush(viewer));
1037       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1038       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1039       PetscCall(VecScatterView(baij->Mvctx, viewer));
1040       PetscFunctionReturn(PETSC_SUCCESS);
1041     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1042       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1043       PetscFunctionReturn(PETSC_SUCCESS);
1044     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1045       PetscFunctionReturn(PETSC_SUCCESS);
1046     }
1047   }
1048 
1049   if (isdraw) {
1050     PetscDraw draw;
1051     PetscBool isnull;
1052     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1053     PetscCall(PetscDrawIsNull(draw, &isnull));
1054     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1055   }
1056 
1057   {
1058     /* assemble the entire matrix onto first processor. */
1059     Mat          A;
1060     Mat_SeqBAIJ *Aloc;
1061     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1062     MatScalar   *a;
1063     const char  *matname;
1064 
1065     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1066     /* Perhaps this should be the type of mat? */
1067     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1068     if (rank == 0) {
1069       PetscCall(MatSetSizes(A, M, N, M, N));
1070     } else {
1071       PetscCall(MatSetSizes(A, 0, 0, M, N));
1072     }
1073     PetscCall(MatSetType(A, MATMPIBAIJ));
1074     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1075     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1076 
1077     /* copy over the A part */
1078     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1079     ai   = Aloc->i;
1080     aj   = Aloc->j;
1081     a    = Aloc->a;
1082     PetscCall(PetscMalloc1(bs, &rvals));
1083 
1084     for (i = 0; i < mbs; i++) {
1085       rvals[0] = bs * (baij->rstartbs + i);
1086       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1087       for (j = ai[i]; j < ai[i + 1]; j++) {
1088         col = (baij->cstartbs + aj[j]) * bs;
1089         for (k = 0; k < bs; k++) {
1090           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1091           col++;
1092           a += bs;
1093         }
1094       }
1095     }
1096     /* copy over the B part */
1097     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1098     ai   = Aloc->i;
1099     aj   = Aloc->j;
1100     a    = Aloc->a;
1101     for (i = 0; i < mbs; i++) {
1102       rvals[0] = bs * (baij->rstartbs + i);
1103       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1104       for (j = ai[i]; j < ai[i + 1]; j++) {
1105         col = baij->garray[aj[j]] * bs;
1106         for (k = 0; k < bs; k++) {
1107           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1108           col++;
1109           a += bs;
1110         }
1111       }
1112     }
1113     PetscCall(PetscFree(rvals));
1114     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1115     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1116     /*
1117        Everyone has to call to draw the matrix since the graphics waits are
1118        synchronized across all processors that share the PetscDraw object
1119     */
1120     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1121     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1122     if (rank == 0) {
1123       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1124       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1125     }
1126     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1127     PetscCall(PetscViewerFlush(viewer));
1128     PetscCall(MatDestroy(&A));
1129   }
1130   PetscFunctionReturn(PETSC_SUCCESS);
1131 }
1132 
1133 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1134 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1135 {
1136   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1137   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1138   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1139   const PetscInt *garray = aij->garray;
1140   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1141   PetscInt64      nz, hnz;
1142   PetscInt       *rowlens, *colidxs;
1143   PetscScalar    *matvals;
1144   PetscMPIInt     rank;
1145 
1146   PetscFunctionBegin;
1147   PetscCall(PetscViewerSetUp(viewer));
1148 
1149   M  = mat->rmap->N;
1150   N  = mat->cmap->N;
1151   m  = mat->rmap->n;
1152   rs = mat->rmap->rstart;
1153   cs = mat->cmap->rstart;
1154   bs = mat->rmap->bs;
1155   nz = bs * bs * (A->nz + B->nz);
1156 
1157   /* write matrix header */
1158   header[0] = MAT_FILE_CLASSID;
1159   header[1] = M;
1160   header[2] = N;
1161   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1162   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1163   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1164   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1165 
1166   /* fill in and store row lengths */
1167   PetscCall(PetscMalloc1(m, &rowlens));
1168   for (cnt = 0, i = 0; i < A->mbs; i++)
1169     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1170   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1171   PetscCall(PetscFree(rowlens));
1172 
1173   /* fill in and store column indices */
1174   PetscCall(PetscMalloc1(nz, &colidxs));
1175   for (cnt = 0, i = 0; i < A->mbs; i++) {
1176     for (k = 0; k < bs; k++) {
1177       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1178         if (garray[B->j[jb]] > cs / bs) break;
1179         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1180       }
1181       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1182         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1183       for (; jb < B->i[i + 1]; jb++)
1184         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1185     }
1186   }
1187   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1188   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1189   PetscCall(PetscFree(colidxs));
1190 
1191   /* fill in and store nonzero values */
1192   PetscCall(PetscMalloc1(nz, &matvals));
1193   for (cnt = 0, i = 0; i < A->mbs; i++) {
1194     for (k = 0; k < bs; k++) {
1195       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1196         if (garray[B->j[jb]] > cs / bs) break;
1197         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1198       }
1199       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1200         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1201       for (; jb < B->i[i + 1]; jb++)
1202         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1203     }
1204   }
1205   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1206   PetscCall(PetscFree(matvals));
1207 
1208   /* write block size option to the viewer's .info file */
1209   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1210   PetscFunctionReturn(PETSC_SUCCESS);
1211 }
1212 
1213 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1214 {
1215   PetscBool iascii, isdraw, issocket, isbinary;
1216 
1217   PetscFunctionBegin;
1218   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1219   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1220   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1221   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1222   if (iascii || isdraw || issocket) {
1223     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1224   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1225   PetscFunctionReturn(PETSC_SUCCESS);
1226 }
1227 
1228 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1229 {
1230   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1231   PetscInt     nt;
1232 
1233   PetscFunctionBegin;
1234   PetscCall(VecGetLocalSize(xx, &nt));
1235   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1236   PetscCall(VecGetLocalSize(yy, &nt));
1237   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1238   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1239   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1240   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1241   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1242   PetscFunctionReturn(PETSC_SUCCESS);
1243 }
1244 
1245 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1246 {
1247   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1248 
1249   PetscFunctionBegin;
1250   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1251   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1252   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1253   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1254   PetscFunctionReturn(PETSC_SUCCESS);
1255 }
1256 
1257 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1258 {
1259   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1260 
1261   PetscFunctionBegin;
1262   /* do nondiagonal part */
1263   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1264   /* do local part */
1265   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1266   /* add partial results together */
1267   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1268   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1269   PetscFunctionReturn(PETSC_SUCCESS);
1270 }
1271 
1272 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1273 {
1274   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1275 
1276   PetscFunctionBegin;
1277   /* do nondiagonal part */
1278   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1279   /* do local part */
1280   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1281   /* add partial results together */
1282   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1283   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1284   PetscFunctionReturn(PETSC_SUCCESS);
1285 }
1286 
1287 /*
1288   This only works correctly for square matrices where the subblock A->A is the
1289    diagonal block
1290 */
1291 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1292 {
1293   PetscFunctionBegin;
1294   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1295   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1296   PetscFunctionReturn(PETSC_SUCCESS);
1297 }
1298 
1299 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1300 {
1301   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1302 
1303   PetscFunctionBegin;
1304   PetscCall(MatScale(a->A, aa));
1305   PetscCall(MatScale(a->B, aa));
1306   PetscFunctionReturn(PETSC_SUCCESS);
1307 }
1308 
1309 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1310 {
1311   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1312   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1313   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1314   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1315   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1316 
1317   PetscFunctionBegin;
1318   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1319   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1320   mat->getrowactive = PETSC_TRUE;
1321 
1322   if (!mat->rowvalues && (idx || v)) {
1323     /*
1324         allocate enough space to hold information from the longest row.
1325     */
1326     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1327     PetscInt     max = 1, mbs = mat->mbs, tmp;
1328     for (i = 0; i < mbs; i++) {
1329       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1330       if (max < tmp) max = tmp;
1331     }
1332     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1333   }
1334   lrow = row - brstart;
1335 
1336   pvA = &vworkA;
1337   pcA = &cworkA;
1338   pvB = &vworkB;
1339   pcB = &cworkB;
1340   if (!v) {
1341     pvA = NULL;
1342     pvB = NULL;
1343   }
1344   if (!idx) {
1345     pcA = NULL;
1346     if (!v) pcB = NULL;
1347   }
1348   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1349   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1350   nztot = nzA + nzB;
1351 
1352   cmap = mat->garray;
1353   if (v || idx) {
1354     if (nztot) {
1355       /* Sort by increasing column numbers, assuming A and B already sorted */
1356       PetscInt imark = -1;
1357       if (v) {
1358         *v = v_p = mat->rowvalues;
1359         for (i = 0; i < nzB; i++) {
1360           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1361           else break;
1362         }
1363         imark = i;
1364         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1365         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1366       }
1367       if (idx) {
1368         *idx = idx_p = mat->rowindices;
1369         if (imark > -1) {
1370           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1371         } else {
1372           for (i = 0; i < nzB; i++) {
1373             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1374             else break;
1375           }
1376           imark = i;
1377         }
1378         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1379         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1380       }
1381     } else {
1382       if (idx) *idx = NULL;
1383       if (v) *v = NULL;
1384     }
1385   }
1386   *nz = nztot;
1387   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1388   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1389   PetscFunctionReturn(PETSC_SUCCESS);
1390 }
1391 
1392 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1393 {
1394   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1395 
1396   PetscFunctionBegin;
1397   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1398   baij->getrowactive = PETSC_FALSE;
1399   PetscFunctionReturn(PETSC_SUCCESS);
1400 }
1401 
1402 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1403 {
1404   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1405 
1406   PetscFunctionBegin;
1407   PetscCall(MatZeroEntries(l->A));
1408   PetscCall(MatZeroEntries(l->B));
1409   PetscFunctionReturn(PETSC_SUCCESS);
1410 }
1411 
1412 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1413 {
1414   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1415   Mat            A = a->A, B = a->B;
1416   PetscLogDouble isend[5], irecv[5];
1417 
1418   PetscFunctionBegin;
1419   info->block_size = (PetscReal)matin->rmap->bs;
1420 
1421   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1422 
1423   isend[0] = info->nz_used;
1424   isend[1] = info->nz_allocated;
1425   isend[2] = info->nz_unneeded;
1426   isend[3] = info->memory;
1427   isend[4] = info->mallocs;
1428 
1429   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1430 
1431   isend[0] += info->nz_used;
1432   isend[1] += info->nz_allocated;
1433   isend[2] += info->nz_unneeded;
1434   isend[3] += info->memory;
1435   isend[4] += info->mallocs;
1436 
1437   if (flag == MAT_LOCAL) {
1438     info->nz_used      = isend[0];
1439     info->nz_allocated = isend[1];
1440     info->nz_unneeded  = isend[2];
1441     info->memory       = isend[3];
1442     info->mallocs      = isend[4];
1443   } else if (flag == MAT_GLOBAL_MAX) {
1444     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1445 
1446     info->nz_used      = irecv[0];
1447     info->nz_allocated = irecv[1];
1448     info->nz_unneeded  = irecv[2];
1449     info->memory       = irecv[3];
1450     info->mallocs      = irecv[4];
1451   } else if (flag == MAT_GLOBAL_SUM) {
1452     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1453 
1454     info->nz_used      = irecv[0];
1455     info->nz_allocated = irecv[1];
1456     info->nz_unneeded  = irecv[2];
1457     info->memory       = irecv[3];
1458     info->mallocs      = irecv[4];
1459   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1460   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1461   info->fill_ratio_needed = 0;
1462   info->factor_mallocs    = 0;
1463   PetscFunctionReturn(PETSC_SUCCESS);
1464 }
1465 
1466 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1467 {
1468   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1469 
1470   PetscFunctionBegin;
1471   switch (op) {
1472   case MAT_NEW_NONZERO_LOCATIONS:
1473   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1474   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1475   case MAT_KEEP_NONZERO_PATTERN:
1476   case MAT_NEW_NONZERO_LOCATION_ERR:
1477     MatCheckPreallocated(A, 1);
1478     PetscCall(MatSetOption(a->A, op, flg));
1479     PetscCall(MatSetOption(a->B, op, flg));
1480     break;
1481   case MAT_ROW_ORIENTED:
1482     MatCheckPreallocated(A, 1);
1483     a->roworiented = flg;
1484 
1485     PetscCall(MatSetOption(a->A, op, flg));
1486     PetscCall(MatSetOption(a->B, op, flg));
1487     break;
1488   case MAT_FORCE_DIAGONAL_ENTRIES:
1489   case MAT_SORTED_FULL:
1490     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1491     break;
1492   case MAT_IGNORE_OFF_PROC_ENTRIES:
1493     a->donotstash = flg;
1494     break;
1495   case MAT_USE_HASH_TABLE:
1496     a->ht_flag = flg;
1497     a->ht_fact = 1.39;
1498     break;
1499   case MAT_SYMMETRIC:
1500   case MAT_STRUCTURALLY_SYMMETRIC:
1501   case MAT_HERMITIAN:
1502   case MAT_SUBMAT_SINGLEIS:
1503   case MAT_SYMMETRY_ETERNAL:
1504   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1505   case MAT_SPD_ETERNAL:
1506     /* if the diagonal matrix is square it inherits some of the properties above */
1507     break;
1508   default:
1509     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1510   }
1511   PetscFunctionReturn(PETSC_SUCCESS);
1512 }
1513 
1514 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1515 {
1516   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1517   Mat_SeqBAIJ *Aloc;
1518   Mat          B;
1519   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1520   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1521   MatScalar   *a;
1522 
1523   PetscFunctionBegin;
1524   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1525   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1526     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1527     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1528     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1529     /* Do not know preallocation information, but must set block size */
1530     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1531   } else {
1532     B = *matout;
1533   }
1534 
1535   /* copy over the A part */
1536   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1537   ai   = Aloc->i;
1538   aj   = Aloc->j;
1539   a    = Aloc->a;
1540   PetscCall(PetscMalloc1(bs, &rvals));
1541 
1542   for (i = 0; i < mbs; i++) {
1543     rvals[0] = bs * (baij->rstartbs + i);
1544     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1545     for (j = ai[i]; j < ai[i + 1]; j++) {
1546       col = (baij->cstartbs + aj[j]) * bs;
1547       for (k = 0; k < bs; k++) {
1548         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1549 
1550         col++;
1551         a += bs;
1552       }
1553     }
1554   }
1555   /* copy over the B part */
1556   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1557   ai   = Aloc->i;
1558   aj   = Aloc->j;
1559   a    = Aloc->a;
1560   for (i = 0; i < mbs; i++) {
1561     rvals[0] = bs * (baij->rstartbs + i);
1562     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1563     for (j = ai[i]; j < ai[i + 1]; j++) {
1564       col = baij->garray[aj[j]] * bs;
1565       for (k = 0; k < bs; k++) {
1566         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1567         col++;
1568         a += bs;
1569       }
1570     }
1571   }
1572   PetscCall(PetscFree(rvals));
1573   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1574   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1575 
1576   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1577   else PetscCall(MatHeaderMerge(A, &B));
1578   PetscFunctionReturn(PETSC_SUCCESS);
1579 }
1580 
1581 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1582 {
1583   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1584   Mat          a = baij->A, b = baij->B;
1585   PetscInt     s1, s2, s3;
1586 
1587   PetscFunctionBegin;
1588   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1589   if (rr) {
1590     PetscCall(VecGetLocalSize(rr, &s1));
1591     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1592     /* Overlap communication with computation. */
1593     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1594   }
1595   if (ll) {
1596     PetscCall(VecGetLocalSize(ll, &s1));
1597     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1598     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1599   }
1600   /* scale  the diagonal block */
1601   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1602 
1603   if (rr) {
1604     /* Do a scatter end and then right scale the off-diagonal block */
1605     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1606     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1607   }
1608   PetscFunctionReturn(PETSC_SUCCESS);
1609 }
1610 
1611 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1612 {
1613   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1614   PetscInt    *lrows;
1615   PetscInt     r, len;
1616   PetscBool    cong;
1617 
1618   PetscFunctionBegin;
1619   /* get locally owned rows */
1620   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1621   /* fix right hand side if needed */
1622   if (x && b) {
1623     const PetscScalar *xx;
1624     PetscScalar       *bb;
1625 
1626     PetscCall(VecGetArrayRead(x, &xx));
1627     PetscCall(VecGetArray(b, &bb));
1628     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1629     PetscCall(VecRestoreArrayRead(x, &xx));
1630     PetscCall(VecRestoreArray(b, &bb));
1631   }
1632 
1633   /* actually zap the local rows */
1634   /*
1635         Zero the required rows. If the "diagonal block" of the matrix
1636      is square and the user wishes to set the diagonal we use separate
1637      code so that MatSetValues() is not called for each diagonal allocating
1638      new memory, thus calling lots of mallocs and slowing things down.
1639 
1640   */
1641   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1642   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1643   PetscCall(MatHasCongruentLayouts(A, &cong));
1644   if ((diag != 0.0) && cong) {
1645     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1646   } else if (diag != 0.0) {
1647     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1648     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1649        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1650     for (r = 0; r < len; ++r) {
1651       const PetscInt row = lrows[r] + A->rmap->rstart;
1652       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1653     }
1654     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1655     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1656   } else {
1657     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1658   }
1659   PetscCall(PetscFree(lrows));
1660 
1661   /* only change matrix nonzero state if pattern was allowed to be changed */
1662   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1663     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1664     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1665   }
1666   PetscFunctionReturn(PETSC_SUCCESS);
1667 }
1668 
1669 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1670 {
1671   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1672   PetscMPIInt        n = A->rmap->n, p = 0;
1673   PetscInt           i, j, k, r, len = 0, row, col, count;
1674   PetscInt          *lrows, *owners = A->rmap->range;
1675   PetscSFNode       *rrows;
1676   PetscSF            sf;
1677   const PetscScalar *xx;
1678   PetscScalar       *bb, *mask;
1679   Vec                xmask, lmask;
1680   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1681   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1682   PetscScalar       *aa;
1683 
1684   PetscFunctionBegin;
1685   /* Create SF where leaves are input rows and roots are owned rows */
1686   PetscCall(PetscMalloc1(n, &lrows));
1687   for (r = 0; r < n; ++r) lrows[r] = -1;
1688   PetscCall(PetscMalloc1(N, &rrows));
1689   for (r = 0; r < N; ++r) {
1690     const PetscInt idx = rows[r];
1691     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1692     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1693       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1694     }
1695     rrows[r].rank  = p;
1696     rrows[r].index = rows[r] - owners[p];
1697   }
1698   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1699   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1700   /* Collect flags for rows to be zeroed */
1701   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1702   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1703   PetscCall(PetscSFDestroy(&sf));
1704   /* Compress and put in row numbers */
1705   for (r = 0; r < n; ++r)
1706     if (lrows[r] >= 0) lrows[len++] = r;
1707   /* zero diagonal part of matrix */
1708   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1709   /* handle off diagonal part of matrix */
1710   PetscCall(MatCreateVecs(A, &xmask, NULL));
1711   PetscCall(VecDuplicate(l->lvec, &lmask));
1712   PetscCall(VecGetArray(xmask, &bb));
1713   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1714   PetscCall(VecRestoreArray(xmask, &bb));
1715   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1716   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1717   PetscCall(VecDestroy(&xmask));
1718   if (x) {
1719     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1720     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1721     PetscCall(VecGetArrayRead(l->lvec, &xx));
1722     PetscCall(VecGetArray(b, &bb));
1723   }
1724   PetscCall(VecGetArray(lmask, &mask));
1725   /* remove zeroed rows of off diagonal matrix */
1726   for (i = 0; i < len; ++i) {
1727     row   = lrows[i];
1728     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1729     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1730     for (k = 0; k < count; ++k) {
1731       aa[0] = 0.0;
1732       aa += bs;
1733     }
1734   }
1735   /* loop over all elements of off process part of matrix zeroing removed columns*/
1736   for (i = 0; i < l->B->rmap->N; ++i) {
1737     row = i / bs;
1738     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1739       for (k = 0; k < bs; ++k) {
1740         col = bs * baij->j[j] + k;
1741         if (PetscAbsScalar(mask[col])) {
1742           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1743           if (x) bb[i] -= aa[0] * xx[col];
1744           aa[0] = 0.0;
1745         }
1746       }
1747     }
1748   }
1749   if (x) {
1750     PetscCall(VecRestoreArray(b, &bb));
1751     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1752   }
1753   PetscCall(VecRestoreArray(lmask, &mask));
1754   PetscCall(VecDestroy(&lmask));
1755   PetscCall(PetscFree(lrows));
1756 
1757   /* only change matrix nonzero state if pattern was allowed to be changed */
1758   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1759     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1760     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1761   }
1762   PetscFunctionReturn(PETSC_SUCCESS);
1763 }
1764 
1765 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1766 {
1767   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1768 
1769   PetscFunctionBegin;
1770   PetscCall(MatSetUnfactored(a->A));
1771   PetscFunctionReturn(PETSC_SUCCESS);
1772 }
1773 
1774 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1775 
1776 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1777 {
1778   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1779   Mat          a, b, c, d;
1780   PetscBool    flg;
1781 
1782   PetscFunctionBegin;
1783   a = matA->A;
1784   b = matA->B;
1785   c = matB->A;
1786   d = matB->B;
1787 
1788   PetscCall(MatEqual(a, c, &flg));
1789   if (flg) PetscCall(MatEqual(b, d, &flg));
1790   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1791   PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793 
1794 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1795 {
1796   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1797   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1798 
1799   PetscFunctionBegin;
1800   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1801   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1802     PetscCall(MatCopy_Basic(A, B, str));
1803   } else {
1804     PetscCall(MatCopy(a->A, b->A, str));
1805     PetscCall(MatCopy(a->B, b->B, str));
1806   }
1807   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1808   PetscFunctionReturn(PETSC_SUCCESS);
1809 }
1810 
1811 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1812 {
1813   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1814   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1815   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1816 
1817   PetscFunctionBegin;
1818   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1819   PetscFunctionReturn(PETSC_SUCCESS);
1820 }
1821 
1822 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1823 {
1824   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1825   PetscBLASInt bnz, one                         = 1;
1826   Mat_SeqBAIJ *x, *y;
1827   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1828 
1829   PetscFunctionBegin;
1830   if (str == SAME_NONZERO_PATTERN) {
1831     PetscScalar alpha = a;
1832     x                 = (Mat_SeqBAIJ *)xx->A->data;
1833     y                 = (Mat_SeqBAIJ *)yy->A->data;
1834     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1835     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1836     x = (Mat_SeqBAIJ *)xx->B->data;
1837     y = (Mat_SeqBAIJ *)yy->B->data;
1838     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1839     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1840     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1841   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1842     PetscCall(MatAXPY_Basic(Y, a, X, str));
1843   } else {
1844     Mat       B;
1845     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1846     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1847     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1848     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1849     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1850     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1851     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1852     PetscCall(MatSetType(B, MATMPIBAIJ));
1853     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1854     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1855     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1856     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1857     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1858     PetscCall(MatHeaderMerge(Y, &B));
1859     PetscCall(PetscFree(nnz_d));
1860     PetscCall(PetscFree(nnz_o));
1861   }
1862   PetscFunctionReturn(PETSC_SUCCESS);
1863 }
1864 
1865 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1866 {
1867   PetscFunctionBegin;
1868   if (PetscDefined(USE_COMPLEX)) {
1869     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1870 
1871     PetscCall(MatConjugate_SeqBAIJ(a->A));
1872     PetscCall(MatConjugate_SeqBAIJ(a->B));
1873   }
1874   PetscFunctionReturn(PETSC_SUCCESS);
1875 }
1876 
1877 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1878 {
1879   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1880 
1881   PetscFunctionBegin;
1882   PetscCall(MatRealPart(a->A));
1883   PetscCall(MatRealPart(a->B));
1884   PetscFunctionReturn(PETSC_SUCCESS);
1885 }
1886 
1887 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1888 {
1889   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1890 
1891   PetscFunctionBegin;
1892   PetscCall(MatImaginaryPart(a->A));
1893   PetscCall(MatImaginaryPart(a->B));
1894   PetscFunctionReturn(PETSC_SUCCESS);
1895 }
1896 
1897 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1898 {
1899   IS       iscol_local;
1900   PetscInt csize;
1901 
1902   PetscFunctionBegin;
1903   PetscCall(ISGetLocalSize(iscol, &csize));
1904   if (call == MAT_REUSE_MATRIX) {
1905     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1906     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1907   } else {
1908     PetscCall(ISAllGather(iscol, &iscol_local));
1909   }
1910   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1911   if (call == MAT_INITIAL_MATRIX) {
1912     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1913     PetscCall(ISDestroy(&iscol_local));
1914   }
1915   PetscFunctionReturn(PETSC_SUCCESS);
1916 }
1917 
1918 /*
1919   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1920   in local and then by concatenating the local matrices the end result.
1921   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1922   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1923 */
1924 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
1925 {
1926   PetscMPIInt  rank, size;
1927   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1928   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1929   Mat          M, Mreuse;
1930   MatScalar   *vwork, *aa;
1931   MPI_Comm     comm;
1932   IS           isrow_new, iscol_new;
1933   Mat_SeqBAIJ *aij;
1934 
1935   PetscFunctionBegin;
1936   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1937   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1938   PetscCallMPI(MPI_Comm_size(comm, &size));
1939   /* The compression and expansion should be avoided. Doesn't point
1940      out errors, might change the indices, hence buggey */
1941   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1942   if (isrow == iscol) {
1943     iscol_new = isrow_new;
1944     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1945   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1946 
1947   if (call == MAT_REUSE_MATRIX) {
1948     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1949     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1950     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse));
1951   } else {
1952     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse));
1953   }
1954   PetscCall(ISDestroy(&isrow_new));
1955   PetscCall(ISDestroy(&iscol_new));
1956   /*
1957       m - number of local rows
1958       n - number of columns (same on all processors)
1959       rstart - first row in new global matrix generated
1960   */
1961   PetscCall(MatGetBlockSize(mat, &bs));
1962   PetscCall(MatGetSize(Mreuse, &m, &n));
1963   m = m / bs;
1964   n = n / bs;
1965 
1966   if (call == MAT_INITIAL_MATRIX) {
1967     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1968     ii  = aij->i;
1969     jj  = aij->j;
1970 
1971     /*
1972         Determine the number of non-zeros in the diagonal and off-diagonal
1973         portions of the matrix in order to do correct preallocation
1974     */
1975 
1976     /* first get start and end of "diagonal" columns */
1977     if (csize == PETSC_DECIDE) {
1978       PetscCall(ISGetSize(isrow, &mglobal));
1979       if (mglobal == n * bs) { /* square matrix */
1980         nlocal = m;
1981       } else {
1982         nlocal = n / size + ((n % size) > rank);
1983       }
1984     } else {
1985       nlocal = csize / bs;
1986     }
1987     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1988     rstart = rend - nlocal;
1989     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1990 
1991     /* next, compute all the lengths */
1992     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1993     for (i = 0; i < m; i++) {
1994       jend = ii[i + 1] - ii[i];
1995       olen = 0;
1996       dlen = 0;
1997       for (j = 0; j < jend; j++) {
1998         if (*jj < rstart || *jj >= rend) olen++;
1999         else dlen++;
2000         jj++;
2001       }
2002       olens[i] = olen;
2003       dlens[i] = dlen;
2004     }
2005     PetscCall(MatCreate(comm, &M));
2006     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2007     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
2008     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2009     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2010     PetscCall(PetscFree2(dlens, olens));
2011   } else {
2012     PetscInt ml, nl;
2013 
2014     M = *newmat;
2015     PetscCall(MatGetLocalSize(M, &ml, &nl));
2016     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2017     PetscCall(MatZeroEntries(M));
2018     /*
2019          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2020        rather than the slower MatSetValues().
2021     */
2022     M->was_assembled = PETSC_TRUE;
2023     M->assembled     = PETSC_FALSE;
2024   }
2025   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2026   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2027   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2028   ii  = aij->i;
2029   jj  = aij->j;
2030   aa  = aij->a;
2031   for (i = 0; i < m; i++) {
2032     row   = rstart / bs + i;
2033     nz    = ii[i + 1] - ii[i];
2034     cwork = jj;
2035     jj += nz;
2036     vwork = aa;
2037     aa += nz * bs * bs;
2038     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2039   }
2040 
2041   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2042   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2043   *newmat = M;
2044 
2045   /* save submatrix used in processor for next request */
2046   if (call == MAT_INITIAL_MATRIX) {
2047     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2048     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2049   }
2050   PetscFunctionReturn(PETSC_SUCCESS);
2051 }
2052 
2053 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2054 {
2055   MPI_Comm        comm, pcomm;
2056   PetscInt        clocal_size, nrows;
2057   const PetscInt *rows;
2058   PetscMPIInt     size;
2059   IS              crowp, lcolp;
2060 
2061   PetscFunctionBegin;
2062   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2063   /* make a collective version of 'rowp' */
2064   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2065   if (pcomm == comm) {
2066     crowp = rowp;
2067   } else {
2068     PetscCall(ISGetSize(rowp, &nrows));
2069     PetscCall(ISGetIndices(rowp, &rows));
2070     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2071     PetscCall(ISRestoreIndices(rowp, &rows));
2072   }
2073   PetscCall(ISSetPermutation(crowp));
2074   /* make a local version of 'colp' */
2075   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2076   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2077   if (size == 1) {
2078     lcolp = colp;
2079   } else {
2080     PetscCall(ISAllGather(colp, &lcolp));
2081   }
2082   PetscCall(ISSetPermutation(lcolp));
2083   /* now we just get the submatrix */
2084   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2085   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B));
2086   /* clean up */
2087   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2088   if (size > 1) PetscCall(ISDestroy(&lcolp));
2089   PetscFunctionReturn(PETSC_SUCCESS);
2090 }
2091 
2092 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2093 {
2094   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2095   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2096 
2097   PetscFunctionBegin;
2098   if (nghosts) *nghosts = B->nbs;
2099   if (ghosts) *ghosts = baij->garray;
2100   PetscFunctionReturn(PETSC_SUCCESS);
2101 }
2102 
2103 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2104 {
2105   Mat          B;
2106   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2107   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2108   Mat_SeqAIJ  *b;
2109   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2110   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2111   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2112 
2113   PetscFunctionBegin;
2114   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2115   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2116 
2117   /*   Tell every processor the number of nonzeros per row  */
2118   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2119   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2120   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2121   displs = recvcounts + size;
2122   for (i = 0; i < size; i++) {
2123     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2124     displs[i]     = A->rmap->range[i] / bs;
2125   }
2126   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2127   /* Create the sequential matrix of the same type as the local block diagonal  */
2128   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2129   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2130   PetscCall(MatSetType(B, MATSEQAIJ));
2131   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2132   b = (Mat_SeqAIJ *)B->data;
2133 
2134   /*     Copy my part of matrix column indices over  */
2135   sendcount  = ad->nz + bd->nz;
2136   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2137   a_jsendbuf = ad->j;
2138   b_jsendbuf = bd->j;
2139   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2140   cnt        = 0;
2141   for (i = 0; i < n; i++) {
2142     /* put in lower diagonal portion */
2143     m = bd->i[i + 1] - bd->i[i];
2144     while (m > 0) {
2145       /* is it above diagonal (in bd (compressed) numbering) */
2146       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2147       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2148       m--;
2149     }
2150 
2151     /* put in diagonal portion */
2152     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2153 
2154     /* put in upper diagonal portion */
2155     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2156   }
2157   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2158 
2159   /*  Gather all column indices to all processors  */
2160   for (i = 0; i < size; i++) {
2161     recvcounts[i] = 0;
2162     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2163   }
2164   displs[0] = 0;
2165   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2166   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2167   /*  Assemble the matrix into useable form (note numerical values not yet set)  */
2168   /* set the b->ilen (length of each row) values */
2169   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2170   /* set the b->i indices */
2171   b->i[0] = 0;
2172   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2173   PetscCall(PetscFree(lens));
2174   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2175   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2176   PetscCall(PetscFree(recvcounts));
2177 
2178   PetscCall(MatPropagateSymmetryOptions(A, B));
2179   *newmat = B;
2180   PetscFunctionReturn(PETSC_SUCCESS);
2181 }
2182 
2183 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2184 {
2185   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2186   Vec          bb1 = NULL;
2187 
2188   PetscFunctionBegin;
2189   if (flag == SOR_APPLY_UPPER) {
2190     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2191     PetscFunctionReturn(PETSC_SUCCESS);
2192   }
2193 
2194   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2195 
2196   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2197     if (flag & SOR_ZERO_INITIAL_GUESS) {
2198       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2199       its--;
2200     }
2201 
2202     while (its--) {
2203       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2204       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2205 
2206       /* update rhs: bb1 = bb - B*x */
2207       PetscCall(VecScale(mat->lvec, -1.0));
2208       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2209 
2210       /* local sweep */
2211       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2212     }
2213   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2214     if (flag & SOR_ZERO_INITIAL_GUESS) {
2215       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2216       its--;
2217     }
2218     while (its--) {
2219       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2220       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2221 
2222       /* update rhs: bb1 = bb - B*x */
2223       PetscCall(VecScale(mat->lvec, -1.0));
2224       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2225 
2226       /* local sweep */
2227       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2228     }
2229   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2230     if (flag & SOR_ZERO_INITIAL_GUESS) {
2231       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2232       its--;
2233     }
2234     while (its--) {
2235       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2236       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2237 
2238       /* update rhs: bb1 = bb - B*x */
2239       PetscCall(VecScale(mat->lvec, -1.0));
2240       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2241 
2242       /* local sweep */
2243       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2244     }
2245   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2246 
2247   PetscCall(VecDestroy(&bb1));
2248   PetscFunctionReturn(PETSC_SUCCESS);
2249 }
2250 
2251 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2252 {
2253   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2254   PetscInt     m, N, i, *garray = aij->garray;
2255   PetscInt     ib, jb, bs = A->rmap->bs;
2256   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2257   MatScalar   *a_val = a_aij->a;
2258   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2259   MatScalar   *b_val = b_aij->a;
2260   PetscReal   *work;
2261 
2262   PetscFunctionBegin;
2263   PetscCall(MatGetSize(A, &m, &N));
2264   PetscCall(PetscCalloc1(N, &work));
2265   if (type == NORM_2) {
2266     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2267       for (jb = 0; jb < bs; jb++) {
2268         for (ib = 0; ib < bs; ib++) {
2269           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2270           a_val++;
2271         }
2272       }
2273     }
2274     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2275       for (jb = 0; jb < bs; jb++) {
2276         for (ib = 0; ib < bs; ib++) {
2277           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2278           b_val++;
2279         }
2280       }
2281     }
2282   } else if (type == NORM_1) {
2283     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2284       for (jb = 0; jb < bs; jb++) {
2285         for (ib = 0; ib < bs; ib++) {
2286           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2287           a_val++;
2288         }
2289       }
2290     }
2291     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2292       for (jb = 0; jb < bs; jb++) {
2293         for (ib = 0; ib < bs; ib++) {
2294           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2295           b_val++;
2296         }
2297       }
2298     }
2299   } else if (type == NORM_INFINITY) {
2300     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2301       for (jb = 0; jb < bs; jb++) {
2302         for (ib = 0; ib < bs; ib++) {
2303           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2304           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2305           a_val++;
2306         }
2307       }
2308     }
2309     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2310       for (jb = 0; jb < bs; jb++) {
2311         for (ib = 0; ib < bs; ib++) {
2312           int col   = garray[b_aij->j[i]] * bs + jb;
2313           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2314           b_val++;
2315         }
2316       }
2317     }
2318   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2319     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2320       for (jb = 0; jb < bs; jb++) {
2321         for (ib = 0; ib < bs; ib++) {
2322           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2323           a_val++;
2324         }
2325       }
2326     }
2327     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2328       for (jb = 0; jb < bs; jb++) {
2329         for (ib = 0; ib < bs; ib++) {
2330           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2331           b_val++;
2332         }
2333       }
2334     }
2335   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2336     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2337       for (jb = 0; jb < bs; jb++) {
2338         for (ib = 0; ib < bs; ib++) {
2339           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2340           a_val++;
2341         }
2342       }
2343     }
2344     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2345       for (jb = 0; jb < bs; jb++) {
2346         for (ib = 0; ib < bs; ib++) {
2347           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2348           b_val++;
2349         }
2350       }
2351     }
2352   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2353   if (type == NORM_INFINITY) {
2354     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2355   } else {
2356     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2357   }
2358   PetscCall(PetscFree(work));
2359   if (type == NORM_2) {
2360     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2361   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2362     for (i = 0; i < N; i++) reductions[i] /= m;
2363   }
2364   PetscFunctionReturn(PETSC_SUCCESS);
2365 }
2366 
2367 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2368 {
2369   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2370 
2371   PetscFunctionBegin;
2372   PetscCall(MatInvertBlockDiagonal(a->A, values));
2373   A->factorerrortype             = a->A->factorerrortype;
2374   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2375   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2376   PetscFunctionReturn(PETSC_SUCCESS);
2377 }
2378 
2379 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2380 {
2381   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2382   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2383 
2384   PetscFunctionBegin;
2385   if (!Y->preallocated) {
2386     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2387   } else if (!aij->nz) {
2388     PetscInt nonew = aij->nonew;
2389     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2390     aij->nonew = nonew;
2391   }
2392   PetscCall(MatShift_Basic(Y, a));
2393   PetscFunctionReturn(PETSC_SUCCESS);
2394 }
2395 
2396 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2397 {
2398   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2399 
2400   PetscFunctionBegin;
2401   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2402   PetscCall(MatMissingDiagonal(a->A, missing, d));
2403   if (d) {
2404     PetscInt rstart;
2405     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2406     *d += rstart / A->rmap->bs;
2407   }
2408   PetscFunctionReturn(PETSC_SUCCESS);
2409 }
2410 
2411 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2412 {
2413   PetscFunctionBegin;
2414   *a = ((Mat_MPIBAIJ *)A->data)->A;
2415   PetscFunctionReturn(PETSC_SUCCESS);
2416 }
2417 
2418 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2419 {
2420   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2421 
2422   PetscFunctionBegin;
2423   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2424   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2425   PetscFunctionReturn(PETSC_SUCCESS);
2426 }
2427 
2428 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2429                                        MatGetRow_MPIBAIJ,
2430                                        MatRestoreRow_MPIBAIJ,
2431                                        MatMult_MPIBAIJ,
2432                                        /* 4*/ MatMultAdd_MPIBAIJ,
2433                                        MatMultTranspose_MPIBAIJ,
2434                                        MatMultTransposeAdd_MPIBAIJ,
2435                                        NULL,
2436                                        NULL,
2437                                        NULL,
2438                                        /*10*/ NULL,
2439                                        NULL,
2440                                        NULL,
2441                                        MatSOR_MPIBAIJ,
2442                                        MatTranspose_MPIBAIJ,
2443                                        /*15*/ MatGetInfo_MPIBAIJ,
2444                                        MatEqual_MPIBAIJ,
2445                                        MatGetDiagonal_MPIBAIJ,
2446                                        MatDiagonalScale_MPIBAIJ,
2447                                        MatNorm_MPIBAIJ,
2448                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2449                                        MatAssemblyEnd_MPIBAIJ,
2450                                        MatSetOption_MPIBAIJ,
2451                                        MatZeroEntries_MPIBAIJ,
2452                                        /*24*/ MatZeroRows_MPIBAIJ,
2453                                        NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        NULL,
2457                                        /*29*/ MatSetUp_MPI_Hash,
2458                                        NULL,
2459                                        NULL,
2460                                        MatGetDiagonalBlock_MPIBAIJ,
2461                                        NULL,
2462                                        /*34*/ MatDuplicate_MPIBAIJ,
2463                                        NULL,
2464                                        NULL,
2465                                        NULL,
2466                                        NULL,
2467                                        /*39*/ MatAXPY_MPIBAIJ,
2468                                        MatCreateSubMatrices_MPIBAIJ,
2469                                        MatIncreaseOverlap_MPIBAIJ,
2470                                        MatGetValues_MPIBAIJ,
2471                                        MatCopy_MPIBAIJ,
2472                                        /*44*/ NULL,
2473                                        MatScale_MPIBAIJ,
2474                                        MatShift_MPIBAIJ,
2475                                        NULL,
2476                                        MatZeroRowsColumns_MPIBAIJ,
2477                                        /*49*/ NULL,
2478                                        NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        NULL,
2482                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2483                                        NULL,
2484                                        MatSetUnfactored_MPIBAIJ,
2485                                        MatPermute_MPIBAIJ,
2486                                        MatSetValuesBlocked_MPIBAIJ,
2487                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2488                                        MatDestroy_MPIBAIJ,
2489                                        MatView_MPIBAIJ,
2490                                        NULL,
2491                                        NULL,
2492                                        /*64*/ NULL,
2493                                        NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        /*74*/ NULL,
2503                                        MatFDColoringApply_BAIJ,
2504                                        NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        /*79*/ NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        MatLoad_MPIBAIJ,
2512                                        /*84*/ NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        /*89*/ NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        /*94*/ NULL,
2523                                        NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        /*99*/ NULL,
2528                                        NULL,
2529                                        NULL,
2530                                        MatConjugate_MPIBAIJ,
2531                                        NULL,
2532                                        /*104*/ NULL,
2533                                        MatRealPart_MPIBAIJ,
2534                                        MatImaginaryPart_MPIBAIJ,
2535                                        NULL,
2536                                        NULL,
2537                                        /*109*/ NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        NULL,
2541                                        MatMissingDiagonal_MPIBAIJ,
2542                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2543                                        NULL,
2544                                        MatGetGhosts_MPIBAIJ,
2545                                        NULL,
2546                                        NULL,
2547                                        /*119*/ NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        NULL,
2551                                        MatGetMultiProcBlock_MPIBAIJ,
2552                                        /*124*/ NULL,
2553                                        MatGetColumnReductions_MPIBAIJ,
2554                                        MatInvertBlockDiagonal_MPIBAIJ,
2555                                        NULL,
2556                                        NULL,
2557                                        /*129*/ NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        /*134*/ NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        NULL,
2567                                        /*139*/ MatSetBlockSizes_Default,
2568                                        NULL,
2569                                        NULL,
2570                                        MatFDColoringSetUp_MPIXAIJ,
2571                                        NULL,
2572                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2573                                        NULL,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                        NULL,
2578                                        /*150*/ NULL,
2579                                        MatEliminateZeros_MPIBAIJ};
2580 
2581 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2582 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2583 
2584 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2585 {
2586   PetscInt        m, rstart, cstart, cend;
2587   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2588   const PetscInt *JJ          = NULL;
2589   PetscScalar    *values      = NULL;
2590   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2591   PetscBool       nooffprocentries;
2592 
2593   PetscFunctionBegin;
2594   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2595   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2596   PetscCall(PetscLayoutSetUp(B->rmap));
2597   PetscCall(PetscLayoutSetUp(B->cmap));
2598   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2599   m      = B->rmap->n / bs;
2600   rstart = B->rmap->rstart / bs;
2601   cstart = B->cmap->rstart / bs;
2602   cend   = B->cmap->rend / bs;
2603 
2604   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2605   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2606   for (i = 0; i < m; i++) {
2607     nz = ii[i + 1] - ii[i];
2608     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2609     nz_max = PetscMax(nz_max, nz);
2610     dlen   = 0;
2611     olen   = 0;
2612     JJ     = jj + ii[i];
2613     for (j = 0; j < nz; j++) {
2614       if (*JJ < cstart || *JJ >= cend) olen++;
2615       else dlen++;
2616       JJ++;
2617     }
2618     d_nnz[i] = dlen;
2619     o_nnz[i] = olen;
2620   }
2621   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2622   PetscCall(PetscFree2(d_nnz, o_nnz));
2623 
2624   values = (PetscScalar *)V;
2625   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2626   for (i = 0; i < m; i++) {
2627     PetscInt        row   = i + rstart;
2628     PetscInt        ncols = ii[i + 1] - ii[i];
2629     const PetscInt *icols = jj + ii[i];
2630     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2631       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2632       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2633     } else { /* block ordering does not match so we can only insert one block at a time. */
2634       PetscInt j;
2635       for (j = 0; j < ncols; j++) {
2636         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2637         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2638       }
2639     }
2640   }
2641 
2642   if (!V) PetscCall(PetscFree(values));
2643   nooffprocentries    = B->nooffprocentries;
2644   B->nooffprocentries = PETSC_TRUE;
2645   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2646   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2647   B->nooffprocentries = nooffprocentries;
2648 
2649   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2650   PetscFunctionReturn(PETSC_SUCCESS);
2651 }
2652 
2653 /*@C
2654   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2655 
2656   Collective
2657 
2658   Input Parameters:
2659 + B  - the matrix
2660 . bs - the block size
2661 . i  - the indices into `j` for the start of each local row (starts with zero)
2662 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2663 - v  - optional values in the matrix
2664 
2665   Level: advanced
2666 
2667   Notes:
2668   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2669   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2670   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2671   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2672   block column and the second index is over columns within a block.
2673 
2674   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2675 
2676 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2677 @*/
2678 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2679 {
2680   PetscFunctionBegin;
2681   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2682   PetscValidType(B, 1);
2683   PetscValidLogicalCollectiveInt(B, bs, 2);
2684   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2685   PetscFunctionReturn(PETSC_SUCCESS);
2686 }
2687 
2688 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2689 {
2690   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2691   PetscInt     i;
2692   PetscMPIInt  size;
2693 
2694   PetscFunctionBegin;
2695   if (B->hash_active) {
2696     B->ops[0]      = b->cops;
2697     B->hash_active = PETSC_FALSE;
2698   }
2699   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2700   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2701   PetscCall(PetscLayoutSetUp(B->rmap));
2702   PetscCall(PetscLayoutSetUp(B->cmap));
2703   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2704 
2705   if (d_nnz) {
2706     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2707   }
2708   if (o_nnz) {
2709     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2710   }
2711 
2712   b->bs2 = bs * bs;
2713   b->mbs = B->rmap->n / bs;
2714   b->nbs = B->cmap->n / bs;
2715   b->Mbs = B->rmap->N / bs;
2716   b->Nbs = B->cmap->N / bs;
2717 
2718   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2719   b->rstartbs = B->rmap->rstart / bs;
2720   b->rendbs   = B->rmap->rend / bs;
2721   b->cstartbs = B->cmap->rstart / bs;
2722   b->cendbs   = B->cmap->rend / bs;
2723 
2724 #if defined(PETSC_USE_CTABLE)
2725   PetscCall(PetscHMapIDestroy(&b->colmap));
2726 #else
2727   PetscCall(PetscFree(b->colmap));
2728 #endif
2729   PetscCall(PetscFree(b->garray));
2730   PetscCall(VecDestroy(&b->lvec));
2731   PetscCall(VecScatterDestroy(&b->Mvctx));
2732 
2733   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2734   PetscCall(MatDestroy(&b->B));
2735   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2736   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2737   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2738 
2739   PetscCall(MatDestroy(&b->A));
2740   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2741   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2742   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2743 
2744   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2745   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2746   B->preallocated  = PETSC_TRUE;
2747   B->was_assembled = PETSC_FALSE;
2748   B->assembled     = PETSC_FALSE;
2749   PetscFunctionReturn(PETSC_SUCCESS);
2750 }
2751 
2752 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2753 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2754 
2755 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2756 {
2757   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2758   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2759   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2760   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2761 
2762   PetscFunctionBegin;
2763   PetscCall(PetscMalloc1(M + 1, &ii));
2764   ii[0] = 0;
2765   for (i = 0; i < M; i++) {
2766     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2767     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2768     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2769     /* remove one from count of matrix has diagonal */
2770     for (j = id[i]; j < id[i + 1]; j++) {
2771       if (jd[j] == i) {
2772         ii[i + 1]--;
2773         break;
2774       }
2775     }
2776   }
2777   PetscCall(PetscMalloc1(ii[M], &jj));
2778   cnt = 0;
2779   for (i = 0; i < M; i++) {
2780     for (j = io[i]; j < io[i + 1]; j++) {
2781       if (garray[jo[j]] > rstart) break;
2782       jj[cnt++] = garray[jo[j]];
2783     }
2784     for (k = id[i]; k < id[i + 1]; k++) {
2785       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2786     }
2787     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2788   }
2789   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2790   PetscFunctionReturn(PETSC_SUCCESS);
2791 }
2792 
2793 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2794 
2795 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2796 
2797 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2798 {
2799   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2800   Mat_MPIAIJ  *b;
2801   Mat          B;
2802 
2803   PetscFunctionBegin;
2804   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2805 
2806   if (reuse == MAT_REUSE_MATRIX) {
2807     B = *newmat;
2808   } else {
2809     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2810     PetscCall(MatSetType(B, MATMPIAIJ));
2811     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2812     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2813     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2814     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2815   }
2816   b = (Mat_MPIAIJ *)B->data;
2817 
2818   if (reuse == MAT_REUSE_MATRIX) {
2819     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2820     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2821   } else {
2822     PetscInt   *garray = a->garray;
2823     Mat_SeqAIJ *bB;
2824     PetscInt    bs, nnz;
2825     PetscCall(MatDestroy(&b->A));
2826     PetscCall(MatDestroy(&b->B));
2827     /* just clear out the data structure */
2828     PetscCall(MatDisAssemble_MPIAIJ(B));
2829     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2830     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2831 
2832     /* Global numbering for b->B columns */
2833     bB  = (Mat_SeqAIJ *)b->B->data;
2834     bs  = A->rmap->bs;
2835     nnz = bB->i[A->rmap->n];
2836     for (PetscInt k = 0; k < nnz; k++) {
2837       PetscInt bj = bB->j[k] / bs;
2838       PetscInt br = bB->j[k] % bs;
2839       bB->j[k]    = garray[bj] * bs + br;
2840     }
2841   }
2842   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2843   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2844 
2845   if (reuse == MAT_INPLACE_MATRIX) {
2846     PetscCall(MatHeaderReplace(A, &B));
2847   } else {
2848     *newmat = B;
2849   }
2850   PetscFunctionReturn(PETSC_SUCCESS);
2851 }
2852 
2853 /*MC
2854    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2855 
2856    Options Database Keys:
2857 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2858 . -mat_block_size <bs> - set the blocksize used to store the matrix
2859 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2860 - -mat_use_hash_table <fact> - set hash table factor
2861 
2862    Level: beginner
2863 
2864    Note:
2865     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2866     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2867 
2868 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2869 M*/
2870 
2871 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2872 
2873 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2874 {
2875   Mat_MPIBAIJ *b;
2876   PetscBool    flg = PETSC_FALSE;
2877 
2878   PetscFunctionBegin;
2879   PetscCall(PetscNew(&b));
2880   B->data      = (void *)b;
2881   B->ops[0]    = MatOps_Values;
2882   B->assembled = PETSC_FALSE;
2883 
2884   B->insertmode = NOT_SET_VALUES;
2885   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2886   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2887 
2888   /* build local table of row and column ownerships */
2889   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2890 
2891   /* build cache for off array entries formed */
2892   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2893 
2894   b->donotstash  = PETSC_FALSE;
2895   b->colmap      = NULL;
2896   b->garray      = NULL;
2897   b->roworiented = PETSC_TRUE;
2898 
2899   /* stuff used in block assembly */
2900   b->barray = NULL;
2901 
2902   /* stuff used for matrix vector multiply */
2903   b->lvec  = NULL;
2904   b->Mvctx = NULL;
2905 
2906   /* stuff for MatGetRow() */
2907   b->rowindices   = NULL;
2908   b->rowvalues    = NULL;
2909   b->getrowactive = PETSC_FALSE;
2910 
2911   /* hash table stuff */
2912   b->ht           = NULL;
2913   b->hd           = NULL;
2914   b->ht_size      = 0;
2915   b->ht_flag      = PETSC_FALSE;
2916   b->ht_fact      = 0;
2917   b->ht_total_ct  = 0;
2918   b->ht_insert_ct = 0;
2919 
2920   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2921   b->ijonly = PETSC_FALSE;
2922 
2923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2926 #if defined(PETSC_HAVE_HYPRE)
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2928 #endif
2929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2936   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2937 
2938   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2939   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2940   if (flg) {
2941     PetscReal fact = 1.39;
2942     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2943     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2944     if (fact <= 1.0) fact = 1.39;
2945     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2946     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2947   }
2948   PetscOptionsEnd();
2949   PetscFunctionReturn(PETSC_SUCCESS);
2950 }
2951 
2952 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2953 /*MC
2954    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2955 
2956    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2957    and `MATMPIBAIJ` otherwise.
2958 
2959    Options Database Keys:
2960 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2961 
2962   Level: beginner
2963 
2964 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2965 M*/
2966 
2967 /*@C
2968   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2969   (block compressed row).
2970 
2971   Collective
2972 
2973   Input Parameters:
2974 + B     - the matrix
2975 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2976           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2977 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2978            submatrix  (same for all local rows)
2979 . d_nnz - array containing the number of block nonzeros in the various block rows
2980            of the in diagonal portion of the local (possibly different for each block
2981            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2982            set it even if it is zero.
2983 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2984            submatrix (same for all local rows).
2985 - o_nnz - array containing the number of nonzeros in the various block rows of the
2986            off-diagonal portion of the local submatrix (possibly different for
2987            each block row) or `NULL`.
2988 
2989    If the *_nnz parameter is given then the *_nz parameter is ignored
2990 
2991   Options Database Keys:
2992 + -mat_block_size            - size of the blocks to use
2993 - -mat_use_hash_table <fact> - set hash table factor
2994 
2995   Level: intermediate
2996 
2997   Notes:
2998   For good matrix assembly performance
2999   the user should preallocate the matrix storage by setting the parameters
3000   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3001   performance can be increased by more than a factor of 50.
3002 
3003   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3004   than it must be used on all processors that share the object for that argument.
3005 
3006   Storage Information:
3007   For a square global matrix we define each processor's diagonal portion
3008   to be its local rows and the corresponding columns (a square submatrix);
3009   each processor's off-diagonal portion encompasses the remainder of the
3010   local matrix (a rectangular submatrix).
3011 
3012   The user can specify preallocated storage for the diagonal part of
3013   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3014   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3015   memory allocation.  Likewise, specify preallocated storage for the
3016   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3017 
3018   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3019   the figure below we depict these three local rows and all columns (0-11).
3020 
3021 .vb
3022            0 1 2 3 4 5 6 7 8 9 10 11
3023           --------------------------
3024    row 3  |o o o d d d o o o o  o  o
3025    row 4  |o o o d d d o o o o  o  o
3026    row 5  |o o o d d d o o o o  o  o
3027           --------------------------
3028 .ve
3029 
3030   Thus, any entries in the d locations are stored in the d (diagonal)
3031   submatrix, and any entries in the o locations are stored in the
3032   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3033   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3034 
3035   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3036   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3037   In general, for PDE problems in which most nonzeros are near the diagonal,
3038   one expects `d_nz` >> `o_nz`.
3039 
3040   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3041   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3042   You can also run with the option `-info` and look for messages with the string
3043   malloc in them to see if additional memory allocation was needed.
3044 
3045 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3046 @*/
3047 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3048 {
3049   PetscFunctionBegin;
3050   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3051   PetscValidType(B, 1);
3052   PetscValidLogicalCollectiveInt(B, bs, 2);
3053   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3054   PetscFunctionReturn(PETSC_SUCCESS);
3055 }
3056 
3057 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3058 /*@C
3059   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3060   (block compressed row).
3061 
3062   Collective
3063 
3064   Input Parameters:
3065 + comm  - MPI communicator
3066 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3067           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3068 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3069            This value should be the same as the local size used in creating the
3070            y vector for the matrix-vector product y = Ax.
3071 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3072            This value should be the same as the local size used in creating the
3073            x vector for the matrix-vector product y = Ax.
3074 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3075 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3076 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3077            submatrix  (same for all local rows)
3078 . d_nnz - array containing the number of nonzero blocks in the various block rows
3079            of the in diagonal portion of the local (possibly different for each block
3080            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3081            and set it even if it is zero.
3082 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3083            submatrix (same for all local rows).
3084 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3085            off-diagonal portion of the local submatrix (possibly different for
3086            each block row) or NULL.
3087 
3088   Output Parameter:
3089 . A - the matrix
3090 
3091   Options Database Keys:
3092 + -mat_block_size            - size of the blocks to use
3093 - -mat_use_hash_table <fact> - set hash table factor
3094 
3095   Level: intermediate
3096 
3097   Notes:
3098   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3099   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3100   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3101 
3102   For good matrix assembly performance
3103   the user should preallocate the matrix storage by setting the parameters
3104   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3105   performance can be increased by more than a factor of 50.
3106 
3107   If the *_nnz parameter is given then the *_nz parameter is ignored
3108 
3109   A nonzero block is any block that as 1 or more nonzeros in it
3110 
3111   The user MUST specify either the local or global matrix dimensions
3112   (possibly both).
3113 
3114   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3115   than it must be used on all processors that share the object for that argument.
3116 
3117   Storage Information:
3118   For a square global matrix we define each processor's diagonal portion
3119   to be its local rows and the corresponding columns (a square submatrix);
3120   each processor's off-diagonal portion encompasses the remainder of the
3121   local matrix (a rectangular submatrix).
3122 
3123   The user can specify preallocated storage for the diagonal part of
3124   the local submatrix with either d_nz or d_nnz (not both).  Set
3125   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3126   memory allocation.  Likewise, specify preallocated storage for the
3127   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3128 
3129   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3130   the figure below we depict these three local rows and all columns (0-11).
3131 
3132 .vb
3133            0 1 2 3 4 5 6 7 8 9 10 11
3134           --------------------------
3135    row 3  |o o o d d d o o o o  o  o
3136    row 4  |o o o d d d o o o o  o  o
3137    row 5  |o o o d d d o o o o  o  o
3138           --------------------------
3139 .ve
3140 
3141   Thus, any entries in the d locations are stored in the d (diagonal)
3142   submatrix, and any entries in the o locations are stored in the
3143   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3144   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3145 
3146   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3147   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3148   In general, for PDE problems in which most nonzeros are near the diagonal,
3149   one expects `d_nz` >> `o_nz`.
3150 
3151 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3152 @*/
3153 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3154 {
3155   PetscMPIInt size;
3156 
3157   PetscFunctionBegin;
3158   PetscCall(MatCreate(comm, A));
3159   PetscCall(MatSetSizes(*A, m, n, M, N));
3160   PetscCallMPI(MPI_Comm_size(comm, &size));
3161   if (size > 1) {
3162     PetscCall(MatSetType(*A, MATMPIBAIJ));
3163     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3164   } else {
3165     PetscCall(MatSetType(*A, MATSEQBAIJ));
3166     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3167   }
3168   PetscFunctionReturn(PETSC_SUCCESS);
3169 }
3170 
3171 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3172 {
3173   Mat          mat;
3174   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3175   PetscInt     len = 0;
3176 
3177   PetscFunctionBegin;
3178   *newmat = NULL;
3179   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3180   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3181   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3182 
3183   mat->factortype   = matin->factortype;
3184   mat->preallocated = PETSC_TRUE;
3185   mat->assembled    = PETSC_TRUE;
3186   mat->insertmode   = NOT_SET_VALUES;
3187 
3188   a             = (Mat_MPIBAIJ *)mat->data;
3189   mat->rmap->bs = matin->rmap->bs;
3190   a->bs2        = oldmat->bs2;
3191   a->mbs        = oldmat->mbs;
3192   a->nbs        = oldmat->nbs;
3193   a->Mbs        = oldmat->Mbs;
3194   a->Nbs        = oldmat->Nbs;
3195 
3196   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3197   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3198 
3199   a->size         = oldmat->size;
3200   a->rank         = oldmat->rank;
3201   a->donotstash   = oldmat->donotstash;
3202   a->roworiented  = oldmat->roworiented;
3203   a->rowindices   = NULL;
3204   a->rowvalues    = NULL;
3205   a->getrowactive = PETSC_FALSE;
3206   a->barray       = NULL;
3207   a->rstartbs     = oldmat->rstartbs;
3208   a->rendbs       = oldmat->rendbs;
3209   a->cstartbs     = oldmat->cstartbs;
3210   a->cendbs       = oldmat->cendbs;
3211 
3212   /* hash table stuff */
3213   a->ht           = NULL;
3214   a->hd           = NULL;
3215   a->ht_size      = 0;
3216   a->ht_flag      = oldmat->ht_flag;
3217   a->ht_fact      = oldmat->ht_fact;
3218   a->ht_total_ct  = 0;
3219   a->ht_insert_ct = 0;
3220 
3221   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3222   if (oldmat->colmap) {
3223 #if defined(PETSC_USE_CTABLE)
3224     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3225 #else
3226     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3227     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3228 #endif
3229   } else a->colmap = NULL;
3230 
3231   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3232     PetscCall(PetscMalloc1(len, &a->garray));
3233     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3234   } else a->garray = NULL;
3235 
3236   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3237   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3238   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3239 
3240   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3241   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3242   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3243   *newmat = mat;
3244   PetscFunctionReturn(PETSC_SUCCESS);
3245 }
3246 
3247 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3248 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3249 {
3250   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3251   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3252   PetscScalar *matvals;
3253 
3254   PetscFunctionBegin;
3255   PetscCall(PetscViewerSetUp(viewer));
3256 
3257   /* read in matrix header */
3258   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3259   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3260   M  = header[1];
3261   N  = header[2];
3262   nz = header[3];
3263   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3264   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3265   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3266 
3267   /* set block sizes from the viewer's .info file */
3268   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3269   /* set local sizes if not set already */
3270   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3271   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3272   /* set global sizes if not set already */
3273   if (mat->rmap->N < 0) mat->rmap->N = M;
3274   if (mat->cmap->N < 0) mat->cmap->N = N;
3275   PetscCall(PetscLayoutSetUp(mat->rmap));
3276   PetscCall(PetscLayoutSetUp(mat->cmap));
3277 
3278   /* check if the matrix sizes are correct */
3279   PetscCall(MatGetSize(mat, &rows, &cols));
3280   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3281   PetscCall(MatGetBlockSize(mat, &bs));
3282   PetscCall(MatGetLocalSize(mat, &m, &n));
3283   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3284   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3285   mbs = m / bs;
3286   nbs = n / bs;
3287 
3288   /* read in row lengths and build row indices */
3289   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3290   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3291   rowidxs[0] = 0;
3292   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3293   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3294   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3295 
3296   /* read in column indices and matrix values */
3297   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3298   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3299   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3300 
3301   {                /* preallocate matrix storage */
3302     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3303     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3304     PetscBool  sbaij, done;
3305     PetscInt  *d_nnz, *o_nnz;
3306 
3307     PetscCall(PetscBTCreate(nbs, &bt));
3308     PetscCall(PetscHSetICreate(&ht));
3309     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3310     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3311     for (i = 0; i < mbs; i++) {
3312       PetscCall(PetscBTMemzero(nbs, bt));
3313       PetscCall(PetscHSetIClear(ht));
3314       for (k = 0; k < bs; k++) {
3315         PetscInt row = bs * i + k;
3316         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3317           PetscInt col = colidxs[j];
3318           if (!sbaij || col >= row) {
3319             if (col >= cs && col < ce) {
3320               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3321             } else {
3322               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3323               if (done) o_nnz[i]++;
3324             }
3325           }
3326         }
3327       }
3328     }
3329     PetscCall(PetscBTDestroy(&bt));
3330     PetscCall(PetscHSetIDestroy(&ht));
3331     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3332     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3333     PetscCall(PetscFree2(d_nnz, o_nnz));
3334   }
3335 
3336   /* store matrix values */
3337   for (i = 0; i < m; i++) {
3338     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3339     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3340   }
3341 
3342   PetscCall(PetscFree(rowidxs));
3343   PetscCall(PetscFree2(colidxs, matvals));
3344   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3345   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3346   PetscFunctionReturn(PETSC_SUCCESS);
3347 }
3348 
3349 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3350 {
3351   PetscBool isbinary;
3352 
3353   PetscFunctionBegin;
3354   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3355   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3356   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3357   PetscFunctionReturn(PETSC_SUCCESS);
3358 }
3359 
3360 /*@
3361   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3362 
3363   Input Parameters:
3364 + mat  - the matrix
3365 - fact - factor
3366 
3367   Options Database Key:
3368 . -mat_use_hash_table <fact> - provide the factor
3369 
3370   Level: advanced
3371 
3372 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3373 @*/
3374 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3375 {
3376   PetscFunctionBegin;
3377   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3378   PetscFunctionReturn(PETSC_SUCCESS);
3379 }
3380 
3381 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3382 {
3383   Mat_MPIBAIJ *baij;
3384 
3385   PetscFunctionBegin;
3386   baij          = (Mat_MPIBAIJ *)mat->data;
3387   baij->ht_fact = fact;
3388   PetscFunctionReturn(PETSC_SUCCESS);
3389 }
3390 
3391 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3392 {
3393   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3394   PetscBool    flg;
3395 
3396   PetscFunctionBegin;
3397   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3398   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3399   if (Ad) *Ad = a->A;
3400   if (Ao) *Ao = a->B;
3401   if (colmap) *colmap = a->garray;
3402   PetscFunctionReturn(PETSC_SUCCESS);
3403 }
3404 
3405 /*
3406     Special version for direct calls from Fortran (to eliminate two function call overheads
3407 */
3408 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3409   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3410 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3411   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3412 #endif
3413 
3414 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3415 /*@C
3416   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3417 
3418   Collective
3419 
3420   Input Parameters:
3421 + matin  - the matrix
3422 . min    - number of input rows
3423 . im     - input rows
3424 . nin    - number of input columns
3425 . in     - input columns
3426 . v      - numerical values input
3427 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3428 
3429   Level: advanced
3430 
3431   Developer Notes:
3432   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3433 
3434 .seealso: `Mat`, `MatSetValuesBlocked()`
3435 @*/
3436 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3437 {
3438   /* convert input arguments to C version */
3439   Mat        mat = *matin;
3440   PetscInt   m = *min, n = *nin;
3441   InsertMode addv = *addvin;
3442 
3443   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3444   const MatScalar *value;
3445   MatScalar       *barray      = baij->barray;
3446   PetscBool        roworiented = baij->roworiented;
3447   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3448   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3449   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3450 
3451   PetscFunctionBegin;
3452   /* tasks normally handled by MatSetValuesBlocked() */
3453   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3454   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3455   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3456   if (mat->assembled) {
3457     mat->was_assembled = PETSC_TRUE;
3458     mat->assembled     = PETSC_FALSE;
3459   }
3460   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3461 
3462   if (!barray) {
3463     PetscCall(PetscMalloc1(bs2, &barray));
3464     baij->barray = barray;
3465   }
3466 
3467   if (roworiented) stepval = (n - 1) * bs;
3468   else stepval = (m - 1) * bs;
3469 
3470   for (i = 0; i < m; i++) {
3471     if (im[i] < 0) continue;
3472     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3473     if (im[i] >= rstart && im[i] < rend) {
3474       row = im[i] - rstart;
3475       for (j = 0; j < n; j++) {
3476         /* If NumCol = 1 then a copy is not required */
3477         if ((roworiented) && (n == 1)) {
3478           barray = (MatScalar *)v + i * bs2;
3479         } else if ((!roworiented) && (m == 1)) {
3480           barray = (MatScalar *)v + j * bs2;
3481         } else { /* Here a copy is required */
3482           if (roworiented) {
3483             value = v + i * (stepval + bs) * bs + j * bs;
3484           } else {
3485             value = v + j * (stepval + bs) * bs + i * bs;
3486           }
3487           for (ii = 0; ii < bs; ii++, value += stepval) {
3488             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3489           }
3490           barray -= bs2;
3491         }
3492 
3493         if (in[j] >= cstart && in[j] < cend) {
3494           col = in[j] - cstart;
3495           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3496         } else if (in[j] < 0) {
3497           continue;
3498         } else {
3499           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3500           if (mat->was_assembled) {
3501             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3502 
3503 #if defined(PETSC_USE_DEBUG)
3504   #if defined(PETSC_USE_CTABLE)
3505             {
3506               PetscInt data;
3507               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3508               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3509             }
3510   #else
3511             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3512   #endif
3513 #endif
3514 #if defined(PETSC_USE_CTABLE)
3515             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3516             col = (col - 1) / bs;
3517 #else
3518             col = (baij->colmap[in[j]] - 1) / bs;
3519 #endif
3520             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3521               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3522               col = in[j];
3523             }
3524           } else col = in[j];
3525           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3526         }
3527       }
3528     } else {
3529       if (!baij->donotstash) {
3530         if (roworiented) {
3531           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3532         } else {
3533           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3534         }
3535       }
3536     }
3537   }
3538 
3539   /* task normally handled by MatSetValuesBlocked() */
3540   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3541   PetscFunctionReturn(PETSC_SUCCESS);
3542 }
3543 
3544 /*@
3545   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3546   CSR format the local rows.
3547 
3548   Collective
3549 
3550   Input Parameters:
3551 + comm - MPI communicator
3552 . bs   - the block size, only a block size of 1 is supported
3553 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3554 . n    - This value should be the same as the local size used in creating the
3555        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3556        calculated if N is given) For square matrices n is almost always m.
3557 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3558 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3559 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3560 . j    - column indices
3561 - a    - matrix values
3562 
3563   Output Parameter:
3564 . mat - the matrix
3565 
3566   Level: intermediate
3567 
3568   Notes:
3569   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3570   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3571   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3572 
3573   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3574   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3575   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3576   with column-major ordering within blocks.
3577 
3578   The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array.
3579 
3580 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3581           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3582 @*/
3583 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3584 {
3585   PetscFunctionBegin;
3586   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3587   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3588   PetscCall(MatCreate(comm, mat));
3589   PetscCall(MatSetSizes(*mat, m, n, M, N));
3590   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3591   PetscCall(MatSetBlockSize(*mat, bs));
3592   PetscCall(MatSetUp(*mat));
3593   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3594   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3595   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3596   PetscFunctionReturn(PETSC_SUCCESS);
3597 }
3598 
3599 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3600 {
3601   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3602   PetscInt    *indx;
3603   PetscScalar *values;
3604 
3605   PetscFunctionBegin;
3606   PetscCall(MatGetSize(inmat, &m, &N));
3607   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3608     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3609     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3610     PetscInt    *bindx, rmax = a->rmax, j;
3611     PetscMPIInt  rank, size;
3612 
3613     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3614     mbs = m / bs;
3615     Nbs = N / cbs;
3616     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3617     nbs = n / cbs;
3618 
3619     PetscCall(PetscMalloc1(rmax, &bindx));
3620     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3621 
3622     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3623     PetscCallMPI(MPI_Comm_rank(comm, &size));
3624     if (rank == size - 1) {
3625       /* Check sum(nbs) = Nbs */
3626       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3627     }
3628 
3629     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3630     for (i = 0; i < mbs; i++) {
3631       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3632       nnz = nnz / bs;
3633       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3634       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3635       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3636     }
3637     PetscCall(PetscFree(bindx));
3638 
3639     PetscCall(MatCreate(comm, outmat));
3640     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3641     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3642     PetscCall(MatSetType(*outmat, MATBAIJ));
3643     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3644     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3645     MatPreallocateEnd(dnz, onz);
3646     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3647   }
3648 
3649   /* numeric phase */
3650   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3651   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3652 
3653   for (i = 0; i < m; i++) {
3654     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3655     Ii = i + rstart;
3656     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3657     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3658   }
3659   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3660   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3661   PetscFunctionReturn(PETSC_SUCCESS);
3662 }
3663