xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision bcd3bd92eda2d5998e2f14c4bbfb33bd936bdc3e)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12 #if defined(PETSC_USE_LOG)
13   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
14 #endif
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(MatStashDestroy_Private(&mat->bstash));
17   PetscCall(MatDestroy(&baij->A));
18   PetscCall(MatDestroy(&baij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&baij->colmap));
21 #else
22   PetscCall(PetscFree(baij->colmap));
23 #endif
24   PetscCall(PetscFree(baij->garray));
25   PetscCall(VecDestroy(&baij->lvec));
26   PetscCall(VecScatterDestroy(&baij->Mvctx));
27   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
28   PetscCall(PetscFree(baij->barray));
29   PetscCall(PetscFree2(baij->hd, baij->ht));
30   PetscCall(PetscFree(baij->rangebs));
31   PetscCall(PetscFree(mat->data));
32 
33   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
43 #if defined(PETSC_HAVE_HYPRE)
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
45 #endif
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
50 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
51 #define TYPE BAIJ
52 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
53 #undef TYPE
54 
55 #if defined(PETSC_HAVE_HYPRE)
56 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
57 #endif
58 
59 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
60 {
61   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
62   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
63   PetscScalar       *va, *vv;
64   Vec                vB, vA;
65   const PetscScalar *vb;
66 
67   PetscFunctionBegin;
68   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
69   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
70 
71   PetscCall(VecGetArrayWrite(vA, &va));
72   if (idx) {
73     for (i = 0; i < m; i++) {
74       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
75     }
76   }
77 
78   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
79   PetscCall(PetscMalloc1(m, &idxb));
80   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
81 
82   PetscCall(VecGetArrayWrite(v, &vv));
83   PetscCall(VecGetArrayRead(vB, &vb));
84   for (i = 0; i < m; i++) {
85     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
86       vv[i] = vb[i];
87       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
88     } else {
89       vv[i] = va[i];
90       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
91     }
92   }
93   PetscCall(VecRestoreArrayWrite(vA, &vv));
94   PetscCall(VecRestoreArrayWrite(vA, &va));
95   PetscCall(VecRestoreArrayRead(vB, &vb));
96   PetscCall(PetscFree(idxb));
97   PetscCall(VecDestroy(&vA));
98   PetscCall(VecDestroy(&vB));
99   PetscFunctionReturn(PETSC_SUCCESS);
100 }
101 
102 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
103 {
104   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
105 
106   PetscFunctionBegin;
107   PetscCall(MatStoreValues(aij->A));
108   PetscCall(MatStoreValues(aij->B));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
113 {
114   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
115 
116   PetscFunctionBegin;
117   PetscCall(MatRetrieveValues(aij->A));
118   PetscCall(MatRetrieveValues(aij->B));
119   PetscFunctionReturn(PETSC_SUCCESS);
120 }
121 
122 /*
123      Local utility routine that creates a mapping from the global column
124    number to the local number in the off-diagonal part of the local
125    storage of the matrix.  This is done in a non scalable way since the
126    length of colmap equals the global matrix length.
127 */
128 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
129 {
130   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
131   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
132   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
133 
134   PetscFunctionBegin;
135 #if defined(PETSC_USE_CTABLE)
136   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
137   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
138 #else
139   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
140   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
141 #endif
142   PetscFunctionReturn(PETSC_SUCCESS);
143 }
144 
145 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
146   do { \
147     brow = row / bs; \
148     rp   = aj + ai[brow]; \
149     ap   = aa + bs2 * ai[brow]; \
150     rmax = aimax[brow]; \
151     nrow = ailen[brow]; \
152     bcol = col / bs; \
153     ridx = row % bs; \
154     cidx = col % bs; \
155     low  = 0; \
156     high = nrow; \
157     while (high - low > 3) { \
158       t = (low + high) / 2; \
159       if (rp[t] > bcol) high = t; \
160       else low = t; \
161     } \
162     for (_i = low; _i < high; _i++) { \
163       if (rp[_i] > bcol) break; \
164       if (rp[_i] == bcol) { \
165         bap = ap + bs2 * _i + bs * cidx + ridx; \
166         if (addv == ADD_VALUES) *bap += value; \
167         else *bap = value; \
168         goto a_noinsert; \
169       } \
170     } \
171     if (a->nonew == 1) goto a_noinsert; \
172     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
173     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
174     N = nrow++ - 1; \
175     /* shift up all the later entries in this row */ \
176     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
177     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
178     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
179     rp[_i]                          = bcol; \
180     ap[bs2 * _i + bs * cidx + ridx] = value; \
181   a_noinsert:; \
182     ailen[brow] = nrow; \
183   } while (0)
184 
185 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
186   do { \
187     brow = row / bs; \
188     rp   = bj + bi[brow]; \
189     ap   = ba + bs2 * bi[brow]; \
190     rmax = bimax[brow]; \
191     nrow = bilen[brow]; \
192     bcol = col / bs; \
193     ridx = row % bs; \
194     cidx = col % bs; \
195     low  = 0; \
196     high = nrow; \
197     while (high - low > 3) { \
198       t = (low + high) / 2; \
199       if (rp[t] > bcol) high = t; \
200       else low = t; \
201     } \
202     for (_i = low; _i < high; _i++) { \
203       if (rp[_i] > bcol) break; \
204       if (rp[_i] == bcol) { \
205         bap = ap + bs2 * _i + bs * cidx + ridx; \
206         if (addv == ADD_VALUES) *bap += value; \
207         else *bap = value; \
208         goto b_noinsert; \
209       } \
210     } \
211     if (b->nonew == 1) goto b_noinsert; \
212     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
213     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
214     N = nrow++ - 1; \
215     /* shift up all the later entries in this row */ \
216     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
217     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
218     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
219     rp[_i]                          = bcol; \
220     ap[bs2 * _i + bs * cidx + ridx] = value; \
221   b_noinsert:; \
222     bilen[brow] = nrow; \
223   } while (0)
224 
225 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
226 {
227   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
228   MatScalar    value;
229   PetscBool    roworiented = baij->roworiented;
230   PetscInt     i, j, row, col;
231   PetscInt     rstart_orig = mat->rmap->rstart;
232   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
233   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
234 
235   /* Some Variables required in the macro */
236   Mat          A     = baij->A;
237   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
238   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
239   MatScalar   *aa = a->a;
240 
241   Mat          B     = baij->B;
242   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
243   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
244   MatScalar   *ba = b->a;
245 
246   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
247   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
248   MatScalar *ap, *bap;
249 
250   PetscFunctionBegin;
251   for (i = 0; i < m; i++) {
252     if (im[i] < 0) continue;
253     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
254     if (im[i] >= rstart_orig && im[i] < rend_orig) {
255       row = im[i] - rstart_orig;
256       for (j = 0; j < n; j++) {
257         if (in[j] >= cstart_orig && in[j] < cend_orig) {
258           col = in[j] - cstart_orig;
259           if (roworiented) value = v[i * n + j];
260           else value = v[i + j * m];
261           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
262         } else if (in[j] < 0) {
263           continue;
264         } else {
265           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
266           if (mat->was_assembled) {
267             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
268 #if defined(PETSC_USE_CTABLE)
269             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
270             col = col - 1;
271 #else
272             col = baij->colmap[in[j] / bs] - 1;
273 #endif
274             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
275               PetscCall(MatDisAssemble_MPIBAIJ(mat));
276               col = in[j];
277               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
278               B     = baij->B;
279               b     = (Mat_SeqBAIJ *)(B)->data;
280               bimax = b->imax;
281               bi    = b->i;
282               bilen = b->ilen;
283               bj    = b->j;
284               ba    = b->a;
285             } else {
286               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
287               col += in[j] % bs;
288             }
289           } else col = in[j];
290           if (roworiented) value = v[i * n + j];
291           else value = v[i + j * m];
292           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
293           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
294         }
295       }
296     } else {
297       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
298       if (!baij->donotstash) {
299         mat->assembled = PETSC_FALSE;
300         if (roworiented) {
301           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
302         } else {
303           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
304         }
305       }
306     }
307   }
308   PetscFunctionReturn(PETSC_SUCCESS);
309 }
310 
311 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
312 {
313   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
314   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
315   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
316   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
317   PetscBool          roworiented = a->roworiented;
318   const PetscScalar *value       = v;
319   MatScalar         *ap, *aa = a->a, *bap;
320 
321   PetscFunctionBegin;
322   rp    = aj + ai[row];
323   ap    = aa + bs2 * ai[row];
324   rmax  = imax[row];
325   nrow  = ailen[row];
326   value = v;
327   low   = 0;
328   high  = nrow;
329   while (high - low > 7) {
330     t = (low + high) / 2;
331     if (rp[t] > col) high = t;
332     else low = t;
333   }
334   for (i = low; i < high; i++) {
335     if (rp[i] > col) break;
336     if (rp[i] == col) {
337       bap = ap + bs2 * i;
338       if (roworiented) {
339         if (is == ADD_VALUES) {
340           for (ii = 0; ii < bs; ii++) {
341             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
342           }
343         } else {
344           for (ii = 0; ii < bs; ii++) {
345             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
346           }
347         }
348       } else {
349         if (is == ADD_VALUES) {
350           for (ii = 0; ii < bs; ii++, value += bs) {
351             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
352             bap += bs;
353           }
354         } else {
355           for (ii = 0; ii < bs; ii++, value += bs) {
356             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
357             bap += bs;
358           }
359         }
360       }
361       goto noinsert2;
362     }
363   }
364   if (nonew == 1) goto noinsert2;
365   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
366   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
367   N = nrow++ - 1;
368   high++;
369   /* shift up all the later entries in this row */
370   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
371   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
372   rp[i] = col;
373   bap   = ap + bs2 * i;
374   if (roworiented) {
375     for (ii = 0; ii < bs; ii++) {
376       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
377     }
378   } else {
379     for (ii = 0; ii < bs; ii++) {
380       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
381     }
382   }
383 noinsert2:;
384   ailen[row] = nrow;
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
388 /*
389     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
390     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
391 */
392 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
393 {
394   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
395   const PetscScalar *value;
396   MatScalar         *barray      = baij->barray;
397   PetscBool          roworiented = baij->roworiented;
398   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
399   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
400   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
401 
402   PetscFunctionBegin;
403   if (!barray) {
404     PetscCall(PetscMalloc1(bs2, &barray));
405     baij->barray = barray;
406   }
407 
408   if (roworiented) stepval = (n - 1) * bs;
409   else stepval = (m - 1) * bs;
410 
411   for (i = 0; i < m; i++) {
412     if (im[i] < 0) continue;
413     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
414     if (im[i] >= rstart && im[i] < rend) {
415       row = im[i] - rstart;
416       for (j = 0; j < n; j++) {
417         /* If NumCol = 1 then a copy is not required */
418         if ((roworiented) && (n == 1)) {
419           barray = (MatScalar *)v + i * bs2;
420         } else if ((!roworiented) && (m == 1)) {
421           barray = (MatScalar *)v + j * bs2;
422         } else { /* Here a copy is required */
423           if (roworiented) {
424             value = v + (i * (stepval + bs) + j) * bs;
425           } else {
426             value = v + (j * (stepval + bs) + i) * bs;
427           }
428           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
429             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
430             barray += bs;
431           }
432           barray -= bs2;
433         }
434 
435         if (in[j] >= cstart && in[j] < cend) {
436           col = in[j] - cstart;
437           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
438         } else if (in[j] < 0) {
439           continue;
440         } else {
441           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
442           if (mat->was_assembled) {
443             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
444 
445 #if defined(PETSC_USE_DEBUG)
446   #if defined(PETSC_USE_CTABLE)
447             {
448               PetscInt data;
449               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
450               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
451             }
452   #else
453             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
454   #endif
455 #endif
456 #if defined(PETSC_USE_CTABLE)
457             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
458             col = (col - 1) / bs;
459 #else
460             col = (baij->colmap[in[j]] - 1) / bs;
461 #endif
462             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
463               PetscCall(MatDisAssemble_MPIBAIJ(mat));
464               col = in[j];
465             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
466           } else col = in[j];
467           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
468         }
469       }
470     } else {
471       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
472       if (!baij->donotstash) {
473         if (roworiented) {
474           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475         } else {
476           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
477         }
478       }
479     }
480   }
481   PetscFunctionReturn(PETSC_SUCCESS);
482 }
483 
484 #define HASH_KEY             0.6180339887
485 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
486 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
487 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
488 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
489 {
490   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
491   PetscBool    roworiented = baij->roworiented;
492   PetscInt     i, j, row, col;
493   PetscInt     rstart_orig = mat->rmap->rstart;
494   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
495   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
496   PetscReal    tmp;
497   MatScalar  **HD       = baij->hd, value;
498   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
499 
500   PetscFunctionBegin;
501   for (i = 0; i < m; i++) {
502     if (PetscDefined(USE_DEBUG)) {
503       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
504       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
505     }
506     row = im[i];
507     if (row >= rstart_orig && row < rend_orig) {
508       for (j = 0; j < n; j++) {
509         col = in[j];
510         if (roworiented) value = v[i * n + j];
511         else value = v[i + j * m];
512         /* Look up PetscInto the Hash Table */
513         key = (row / bs) * Nbs + (col / bs) + 1;
514         h1  = HASH(size, key, tmp);
515 
516         idx = h1;
517         if (PetscDefined(USE_DEBUG)) {
518           insert_ct++;
519           total_ct++;
520           if (HT[idx] != key) {
521             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
522               ;
523             if (idx == size) {
524               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
525                 ;
526               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
527             }
528           }
529         } else if (HT[idx] != key) {
530           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
531             ;
532           if (idx == size) {
533             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
534               ;
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
595               ;
596             if (idx == size) {
597               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
598                 ;
599               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
600             }
601           }
602         } else if (HT[idx] != key) {
603           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
604             ;
605           if (idx == size) {
606             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
607               ;
608             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
609           }
610         }
611         baij_a = HD[idx];
612         if (roworiented) {
613           /*value = v + i*(stepval+bs)*bs + j*bs;*/
614           /* value = v + (i*(stepval+bs)+j)*bs; */
615           value = v_t;
616           v_t += bs;
617           if (addv == ADD_VALUES) {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
620             }
621           } else {
622             for (ii = 0; ii < bs; ii++, value += stepval) {
623               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
624             }
625           }
626         } else {
627           value = v + j * (stepval + bs) * bs + i * bs;
628           if (addv == ADD_VALUES) {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
631             }
632           } else {
633             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
634               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
635             }
636           }
637         }
638       }
639     } else {
640       if (!baij->donotstash) {
641         if (roworiented) {
642           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643         } else {
644           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
645         }
646       }
647     }
648   }
649   if (PetscDefined(USE_DEBUG)) {
650     baij->ht_total_ct += total_ct;
651     baij->ht_insert_ct += insert_ct;
652   }
653   PetscFunctionReturn(PETSC_SUCCESS);
654 }
655 
656 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
657 {
658   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
659   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
660   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
661 
662   PetscFunctionBegin;
663   for (i = 0; i < m; i++) {
664     if (idxm[i] < 0) continue; /* negative row */
665     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
666     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
667       row = idxm[i] - bsrstart;
668       for (j = 0; j < n; j++) {
669         if (idxn[j] < 0) continue; /* negative column */
670         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
671         if (idxn[j] >= bscstart && idxn[j] < bscend) {
672           col = idxn[j] - bscstart;
673           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
674         } else {
675           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
676 #if defined(PETSC_USE_CTABLE)
677           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
678           data--;
679 #else
680           data = baij->colmap[idxn[j] / bs] - 1;
681 #endif
682           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
683           else {
684             col = data + idxn[j] % bs;
685             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
686           }
687         }
688       }
689     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
690   }
691   PetscFunctionReturn(PETSC_SUCCESS);
692 }
693 
694 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
695 {
696   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
697   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
698   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
699   PetscReal    sum = 0.0;
700   MatScalar   *v;
701 
702   PetscFunctionBegin;
703   if (baij->size == 1) {
704     PetscCall(MatNorm(baij->A, type, nrm));
705   } else {
706     if (type == NORM_FROBENIUS) {
707       v  = amat->a;
708       nz = amat->nz * bs2;
709       for (i = 0; i < nz; i++) {
710         sum += PetscRealPart(PetscConj(*v) * (*v));
711         v++;
712       }
713       v  = bmat->a;
714       nz = bmat->nz * bs2;
715       for (i = 0; i < nz; i++) {
716         sum += PetscRealPart(PetscConj(*v) * (*v));
717         v++;
718       }
719       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
720       *nrm = PetscSqrtReal(*nrm);
721     } else if (type == NORM_1) { /* max column sum */
722       PetscReal *tmp, *tmp2;
723       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
724       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
725       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
726       v  = amat->a;
727       jj = amat->j;
728       for (i = 0; i < amat->nz; i++) {
729         for (j = 0; j < bs; j++) {
730           col = bs * (cstart + *jj) + j; /* column index */
731           for (row = 0; row < bs; row++) {
732             tmp[col] += PetscAbsScalar(*v);
733             v++;
734           }
735         }
736         jj++;
737       }
738       v  = bmat->a;
739       jj = bmat->j;
740       for (i = 0; i < bmat->nz; i++) {
741         for (j = 0; j < bs; j++) {
742           col = bs * garray[*jj] + j;
743           for (row = 0; row < bs; row++) {
744             tmp[col] += PetscAbsScalar(*v);
745             v++;
746           }
747         }
748         jj++;
749       }
750       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
751       *nrm = 0.0;
752       for (j = 0; j < mat->cmap->N; j++) {
753         if (tmp2[j] > *nrm) *nrm = tmp2[j];
754       }
755       PetscCall(PetscFree(tmp));
756       PetscCall(PetscFree(tmp2));
757     } else if (type == NORM_INFINITY) { /* max row sum */
758       PetscReal *sums;
759       PetscCall(PetscMalloc1(bs, &sums));
760       sum = 0.0;
761       for (j = 0; j < amat->mbs; j++) {
762         for (row = 0; row < bs; row++) sums[row] = 0.0;
763         v  = amat->a + bs2 * amat->i[j];
764         nz = amat->i[j + 1] - amat->i[j];
765         for (i = 0; i < nz; i++) {
766           for (col = 0; col < bs; col++) {
767             for (row = 0; row < bs; row++) {
768               sums[row] += PetscAbsScalar(*v);
769               v++;
770             }
771           }
772         }
773         v  = bmat->a + bs2 * bmat->i[j];
774         nz = bmat->i[j + 1] - bmat->i[j];
775         for (i = 0; i < nz; i++) {
776           for (col = 0; col < bs; col++) {
777             for (row = 0; row < bs; row++) {
778               sums[row] += PetscAbsScalar(*v);
779               v++;
780             }
781           }
782         }
783         for (row = 0; row < bs; row++) {
784           if (sums[row] > sum) sum = sums[row];
785         }
786       }
787       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
788       PetscCall(PetscFree(sums));
789     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
790   }
791   PetscFunctionReturn(PETSC_SUCCESS);
792 }
793 
794 /*
795   Creates the hash table, and sets the table
796   This table is created only once.
797   If new entried need to be added to the matrix
798   then the hash table has to be destroyed and
799   recreated.
800 */
801 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
802 {
803   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
804   Mat          A = baij->A, B = baij->B;
805   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
806   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
807   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
808   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
809   PetscInt    *HT, key;
810   MatScalar  **HD;
811   PetscReal    tmp;
812 #if defined(PETSC_USE_INFO)
813   PetscInt ct = 0, max = 0;
814 #endif
815 
816   PetscFunctionBegin;
817   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
818 
819   baij->ht_size = (PetscInt)(factor * nz);
820   ht_size       = baij->ht_size;
821 
822   /* Allocate Memory for Hash Table */
823   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
824   HD = baij->hd;
825   HT = baij->ht;
826 
827   /* Loop Over A */
828   for (i = 0; i < a->mbs; i++) {
829     for (j = ai[i]; j < ai[i + 1]; j++) {
830       row = i + rstart;
831       col = aj[j] + cstart;
832 
833       key = row * Nbs + col + 1;
834       h1  = HASH(ht_size, key, tmp);
835       for (k = 0; k < ht_size; k++) {
836         if (!HT[(h1 + k) % ht_size]) {
837           HT[(h1 + k) % ht_size] = key;
838           HD[(h1 + k) % ht_size] = a->a + j * bs2;
839           break;
840 #if defined(PETSC_USE_INFO)
841         } else {
842           ct++;
843 #endif
844         }
845       }
846 #if defined(PETSC_USE_INFO)
847       if (k > max) max = k;
848 #endif
849     }
850   }
851   /* Loop Over B */
852   for (i = 0; i < b->mbs; i++) {
853     for (j = bi[i]; j < bi[i + 1]; j++) {
854       row = i + rstart;
855       col = garray[bj[j]];
856       key = row * Nbs + col + 1;
857       h1  = HASH(ht_size, key, tmp);
858       for (k = 0; k < ht_size; k++) {
859         if (!HT[(h1 + k) % ht_size]) {
860           HT[(h1 + k) % ht_size] = key;
861           HD[(h1 + k) % ht_size] = b->a + j * bs2;
862           break;
863 #if defined(PETSC_USE_INFO)
864         } else {
865           ct++;
866 #endif
867         }
868       }
869 #if defined(PETSC_USE_INFO)
870       if (k > max) max = k;
871 #endif
872     }
873   }
874 
875   /* Print Summary */
876 #if defined(PETSC_USE_INFO)
877   for (i = 0, j = 0; i < ht_size; i++) {
878     if (HT[i]) j++;
879   }
880   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
881 #endif
882   PetscFunctionReturn(PETSC_SUCCESS);
883 }
884 
885 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
886 {
887   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
888   PetscInt     nstash, reallocs;
889 
890   PetscFunctionBegin;
891   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
892 
893   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
894   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
895   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
898   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
899   PetscFunctionReturn(PETSC_SUCCESS);
900 }
901 
902 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
903 {
904   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
905   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
906   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
907   PetscInt    *row, *col;
908   PetscBool    r1, r2, r3, other_disassembled;
909   MatScalar   *val;
910   PetscMPIInt  n;
911 
912   PetscFunctionBegin;
913   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
914   if (!baij->donotstash && !mat->nooffprocentries) {
915     while (1) {
916       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
917       if (!flg) break;
918 
919       for (i = 0; i < n;) {
920         /* Now identify the consecutive vals belonging to the same row */
921         for (j = i, rstart = row[j]; j < n; j++) {
922           if (row[j] != rstart) break;
923         }
924         if (j < n) ncols = j - i;
925         else ncols = n - i;
926         /* Now assemble all these values with a single function call */
927         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
928         i = j;
929       }
930     }
931     PetscCall(MatStashScatterEnd_Private(&mat->stash));
932     /* Now process the block-stash. Since the values are stashed column-oriented,
933        set the roworiented flag to column oriented, and after MatSetValues()
934        restore the original flags */
935     r1 = baij->roworiented;
936     r2 = a->roworiented;
937     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
938 
939     baij->roworiented = PETSC_FALSE;
940     a->roworiented    = PETSC_FALSE;
941 
942     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
943     while (1) {
944       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
945       if (!flg) break;
946 
947       for (i = 0; i < n;) {
948         /* Now identify the consecutive vals belonging to the same row */
949         for (j = i, rstart = row[j]; j < n; j++) {
950           if (row[j] != rstart) break;
951         }
952         if (j < n) ncols = j - i;
953         else ncols = n - i;
954         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
955         i = j;
956       }
957     }
958     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
959 
960     baij->roworiented = r1;
961     a->roworiented    = r2;
962 
963     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
964   }
965 
966   PetscCall(MatAssemblyBegin(baij->A, mode));
967   PetscCall(MatAssemblyEnd(baij->A, mode));
968 
969   /* determine if any processor has disassembled, if so we must
970      also disassemble ourselves, in order that we may reassemble. */
971   /*
972      if nonzero structure of submatrix B cannot change then we know that
973      no processor disassembled thus we can skip this stuff
974   */
975   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
976     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
977     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
978   }
979 
980   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
981   PetscCall(MatAssemblyBegin(baij->B, mode));
982   PetscCall(MatAssemblyEnd(baij->B, mode));
983 
984 #if defined(PETSC_USE_INFO)
985   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
986     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
987 
988     baij->ht_total_ct  = 0;
989     baij->ht_insert_ct = 0;
990   }
991 #endif
992   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
993     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
994 
995     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
996     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
997   }
998 
999   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
1000 
1001   baij->rowvalues = NULL;
1002 
1003   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1004   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
1005     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1006     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1007   }
1008   PetscFunctionReturn(PETSC_SUCCESS);
1009 }
1010 
1011 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1012 #include <petscdraw.h>
1013 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1014 {
1015   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1016   PetscMPIInt       rank = baij->rank;
1017   PetscInt          bs   = mat->rmap->bs;
1018   PetscBool         iascii, isdraw;
1019   PetscViewer       sviewer;
1020   PetscViewerFormat format;
1021 
1022   PetscFunctionBegin;
1023   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1024   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1025   if (iascii) {
1026     PetscCall(PetscViewerGetFormat(viewer, &format));
1027     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1028       MatInfo info;
1029       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1030       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1031       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1032       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1033                                                    mat->rmap->bs, (double)info.memory));
1034       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1035       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1036       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1037       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1038       PetscCall(PetscViewerFlush(viewer));
1039       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1040       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1041       PetscCall(VecScatterView(baij->Mvctx, viewer));
1042       PetscFunctionReturn(PETSC_SUCCESS);
1043     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1044       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1045       PetscFunctionReturn(PETSC_SUCCESS);
1046     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1047       PetscFunctionReturn(PETSC_SUCCESS);
1048     }
1049   }
1050 
1051   if (isdraw) {
1052     PetscDraw draw;
1053     PetscBool isnull;
1054     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1055     PetscCall(PetscDrawIsNull(draw, &isnull));
1056     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1057   }
1058 
1059   {
1060     /* assemble the entire matrix onto first processor. */
1061     Mat          A;
1062     Mat_SeqBAIJ *Aloc;
1063     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1064     MatScalar   *a;
1065     const char  *matname;
1066 
1067     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1068     /* Perhaps this should be the type of mat? */
1069     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1070     if (rank == 0) {
1071       PetscCall(MatSetSizes(A, M, N, M, N));
1072     } else {
1073       PetscCall(MatSetSizes(A, 0, 0, M, N));
1074     }
1075     PetscCall(MatSetType(A, MATMPIBAIJ));
1076     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1077     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1078 
1079     /* copy over the A part */
1080     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1081     ai   = Aloc->i;
1082     aj   = Aloc->j;
1083     a    = Aloc->a;
1084     PetscCall(PetscMalloc1(bs, &rvals));
1085 
1086     for (i = 0; i < mbs; i++) {
1087       rvals[0] = bs * (baij->rstartbs + i);
1088       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1089       for (j = ai[i]; j < ai[i + 1]; j++) {
1090         col = (baij->cstartbs + aj[j]) * bs;
1091         for (k = 0; k < bs; k++) {
1092           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1093           col++;
1094           a += bs;
1095         }
1096       }
1097     }
1098     /* copy over the B part */
1099     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1100     ai   = Aloc->i;
1101     aj   = Aloc->j;
1102     a    = Aloc->a;
1103     for (i = 0; i < mbs; i++) {
1104       rvals[0] = bs * (baij->rstartbs + i);
1105       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1106       for (j = ai[i]; j < ai[i + 1]; j++) {
1107         col = baij->garray[aj[j]] * bs;
1108         for (k = 0; k < bs; k++) {
1109           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1110           col++;
1111           a += bs;
1112         }
1113       }
1114     }
1115     PetscCall(PetscFree(rvals));
1116     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1117     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1118     /*
1119        Everyone has to call to draw the matrix since the graphics waits are
1120        synchronized across all processors that share the PetscDraw object
1121     */
1122     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1123     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1124     if (rank == 0) {
1125       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1126       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1127     }
1128     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1129     PetscCall(PetscViewerFlush(viewer));
1130     PetscCall(MatDestroy(&A));
1131   }
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1136 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1137 {
1138   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1139   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1140   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1141   const PetscInt *garray = aij->garray;
1142   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1143   PetscInt64      nz, hnz;
1144   PetscInt       *rowlens, *colidxs;
1145   PetscScalar    *matvals;
1146   PetscMPIInt     rank;
1147 
1148   PetscFunctionBegin;
1149   PetscCall(PetscViewerSetUp(viewer));
1150 
1151   M  = mat->rmap->N;
1152   N  = mat->cmap->N;
1153   m  = mat->rmap->n;
1154   rs = mat->rmap->rstart;
1155   cs = mat->cmap->rstart;
1156   bs = mat->rmap->bs;
1157   nz = bs * bs * (A->nz + B->nz);
1158 
1159   /* write matrix header */
1160   header[0] = MAT_FILE_CLASSID;
1161   header[1] = M;
1162   header[2] = N;
1163   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1164   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1165   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1166   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1167 
1168   /* fill in and store row lengths */
1169   PetscCall(PetscMalloc1(m, &rowlens));
1170   for (cnt = 0, i = 0; i < A->mbs; i++)
1171     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1172   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1173   PetscCall(PetscFree(rowlens));
1174 
1175   /* fill in and store column indices */
1176   PetscCall(PetscMalloc1(nz, &colidxs));
1177   for (cnt = 0, i = 0; i < A->mbs; i++) {
1178     for (k = 0; k < bs; k++) {
1179       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1180         if (garray[B->j[jb]] > cs / bs) break;
1181         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1182       }
1183       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1184         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1185       for (; jb < B->i[i + 1]; jb++)
1186         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1187     }
1188   }
1189   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1190   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1191   PetscCall(PetscFree(colidxs));
1192 
1193   /* fill in and store nonzero values */
1194   PetscCall(PetscMalloc1(nz, &matvals));
1195   for (cnt = 0, i = 0; i < A->mbs; i++) {
1196     for (k = 0; k < bs; k++) {
1197       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1198         if (garray[B->j[jb]] > cs / bs) break;
1199         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1200       }
1201       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1202         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1203       for (; jb < B->i[i + 1]; jb++)
1204         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1205     }
1206   }
1207   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1208   PetscCall(PetscFree(matvals));
1209 
1210   /* write block size option to the viewer's .info file */
1211   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1212   PetscFunctionReturn(PETSC_SUCCESS);
1213 }
1214 
1215 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1216 {
1217   PetscBool iascii, isdraw, issocket, isbinary;
1218 
1219   PetscFunctionBegin;
1220   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1221   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1222   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1223   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1224   if (iascii || isdraw || issocket) {
1225     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1226   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1227   PetscFunctionReturn(PETSC_SUCCESS);
1228 }
1229 
1230 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1231 {
1232   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1233   PetscInt     nt;
1234 
1235   PetscFunctionBegin;
1236   PetscCall(VecGetLocalSize(xx, &nt));
1237   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1238   PetscCall(VecGetLocalSize(yy, &nt));
1239   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1240   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1241   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1242   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1243   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1244   PetscFunctionReturn(PETSC_SUCCESS);
1245 }
1246 
1247 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1248 {
1249   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1250 
1251   PetscFunctionBegin;
1252   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1253   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1254   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1255   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1256   PetscFunctionReturn(PETSC_SUCCESS);
1257 }
1258 
1259 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1260 {
1261   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1262 
1263   PetscFunctionBegin;
1264   /* do nondiagonal part */
1265   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1266   /* do local part */
1267   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1268   /* add partial results together */
1269   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1270   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1271   PetscFunctionReturn(PETSC_SUCCESS);
1272 }
1273 
1274 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1275 {
1276   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1277 
1278   PetscFunctionBegin;
1279   /* do nondiagonal part */
1280   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1281   /* do local part */
1282   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1283   /* add partial results together */
1284   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1285   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1286   PetscFunctionReturn(PETSC_SUCCESS);
1287 }
1288 
1289 /*
1290   This only works correctly for square matrices where the subblock A->A is the
1291    diagonal block
1292 */
1293 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1294 {
1295   PetscFunctionBegin;
1296   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1297   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1298   PetscFunctionReturn(PETSC_SUCCESS);
1299 }
1300 
1301 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1302 {
1303   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1304 
1305   PetscFunctionBegin;
1306   PetscCall(MatScale(a->A, aa));
1307   PetscCall(MatScale(a->B, aa));
1308   PetscFunctionReturn(PETSC_SUCCESS);
1309 }
1310 
1311 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1312 {
1313   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1314   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1315   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1316   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1317   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1318 
1319   PetscFunctionBegin;
1320   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1321   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1322   mat->getrowactive = PETSC_TRUE;
1323 
1324   if (!mat->rowvalues && (idx || v)) {
1325     /*
1326         allocate enough space to hold information from the longest row.
1327     */
1328     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1329     PetscInt     max = 1, mbs = mat->mbs, tmp;
1330     for (i = 0; i < mbs; i++) {
1331       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1332       if (max < tmp) max = tmp;
1333     }
1334     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1335   }
1336   lrow = row - brstart;
1337 
1338   pvA = &vworkA;
1339   pcA = &cworkA;
1340   pvB = &vworkB;
1341   pcB = &cworkB;
1342   if (!v) {
1343     pvA = NULL;
1344     pvB = NULL;
1345   }
1346   if (!idx) {
1347     pcA = NULL;
1348     if (!v) pcB = NULL;
1349   }
1350   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1351   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1352   nztot = nzA + nzB;
1353 
1354   cmap = mat->garray;
1355   if (v || idx) {
1356     if (nztot) {
1357       /* Sort by increasing column numbers, assuming A and B already sorted */
1358       PetscInt imark = -1;
1359       if (v) {
1360         *v = v_p = mat->rowvalues;
1361         for (i = 0; i < nzB; i++) {
1362           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1363           else break;
1364         }
1365         imark = i;
1366         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1367         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1368       }
1369       if (idx) {
1370         *idx = idx_p = mat->rowindices;
1371         if (imark > -1) {
1372           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1373         } else {
1374           for (i = 0; i < nzB; i++) {
1375             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1376             else break;
1377           }
1378           imark = i;
1379         }
1380         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1381         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1382       }
1383     } else {
1384       if (idx) *idx = NULL;
1385       if (v) *v = NULL;
1386     }
1387   }
1388   *nz = nztot;
1389   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1390   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1391   PetscFunctionReturn(PETSC_SUCCESS);
1392 }
1393 
1394 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1395 {
1396   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1397 
1398   PetscFunctionBegin;
1399   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1400   baij->getrowactive = PETSC_FALSE;
1401   PetscFunctionReturn(PETSC_SUCCESS);
1402 }
1403 
1404 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1405 {
1406   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1407 
1408   PetscFunctionBegin;
1409   PetscCall(MatZeroEntries(l->A));
1410   PetscCall(MatZeroEntries(l->B));
1411   PetscFunctionReturn(PETSC_SUCCESS);
1412 }
1413 
1414 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1415 {
1416   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1417   Mat            A = a->A, B = a->B;
1418   PetscLogDouble isend[5], irecv[5];
1419 
1420   PetscFunctionBegin;
1421   info->block_size = (PetscReal)matin->rmap->bs;
1422 
1423   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1424 
1425   isend[0] = info->nz_used;
1426   isend[1] = info->nz_allocated;
1427   isend[2] = info->nz_unneeded;
1428   isend[3] = info->memory;
1429   isend[4] = info->mallocs;
1430 
1431   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1432 
1433   isend[0] += info->nz_used;
1434   isend[1] += info->nz_allocated;
1435   isend[2] += info->nz_unneeded;
1436   isend[3] += info->memory;
1437   isend[4] += info->mallocs;
1438 
1439   if (flag == MAT_LOCAL) {
1440     info->nz_used      = isend[0];
1441     info->nz_allocated = isend[1];
1442     info->nz_unneeded  = isend[2];
1443     info->memory       = isend[3];
1444     info->mallocs      = isend[4];
1445   } else if (flag == MAT_GLOBAL_MAX) {
1446     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else if (flag == MAT_GLOBAL_SUM) {
1454     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1455 
1456     info->nz_used      = irecv[0];
1457     info->nz_allocated = irecv[1];
1458     info->nz_unneeded  = irecv[2];
1459     info->memory       = irecv[3];
1460     info->mallocs      = irecv[4];
1461   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1462   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1463   info->fill_ratio_needed = 0;
1464   info->factor_mallocs    = 0;
1465   PetscFunctionReturn(PETSC_SUCCESS);
1466 }
1467 
1468 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1469 {
1470   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1471 
1472   PetscFunctionBegin;
1473   switch (op) {
1474   case MAT_NEW_NONZERO_LOCATIONS:
1475   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1476   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1477   case MAT_KEEP_NONZERO_PATTERN:
1478   case MAT_NEW_NONZERO_LOCATION_ERR:
1479     MatCheckPreallocated(A, 1);
1480     PetscCall(MatSetOption(a->A, op, flg));
1481     PetscCall(MatSetOption(a->B, op, flg));
1482     break;
1483   case MAT_ROW_ORIENTED:
1484     MatCheckPreallocated(A, 1);
1485     a->roworiented = flg;
1486 
1487     PetscCall(MatSetOption(a->A, op, flg));
1488     PetscCall(MatSetOption(a->B, op, flg));
1489     break;
1490   case MAT_FORCE_DIAGONAL_ENTRIES:
1491   case MAT_SORTED_FULL:
1492     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1493     break;
1494   case MAT_IGNORE_OFF_PROC_ENTRIES:
1495     a->donotstash = flg;
1496     break;
1497   case MAT_USE_HASH_TABLE:
1498     a->ht_flag = flg;
1499     a->ht_fact = 1.39;
1500     break;
1501   case MAT_SYMMETRIC:
1502   case MAT_STRUCTURALLY_SYMMETRIC:
1503   case MAT_HERMITIAN:
1504   case MAT_SUBMAT_SINGLEIS:
1505   case MAT_SYMMETRY_ETERNAL:
1506   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1507   case MAT_SPD_ETERNAL:
1508     /* if the diagonal matrix is square it inherits some of the properties above */
1509     break;
1510   default:
1511     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1512   }
1513   PetscFunctionReturn(PETSC_SUCCESS);
1514 }
1515 
1516 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1517 {
1518   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1519   Mat_SeqBAIJ *Aloc;
1520   Mat          B;
1521   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1522   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1523   MatScalar   *a;
1524 
1525   PetscFunctionBegin;
1526   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1527   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1528     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1529     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1530     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1531     /* Do not know preallocation information, but must set block size */
1532     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1533   } else {
1534     B = *matout;
1535   }
1536 
1537   /* copy over the A part */
1538   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1539   ai   = Aloc->i;
1540   aj   = Aloc->j;
1541   a    = Aloc->a;
1542   PetscCall(PetscMalloc1(bs, &rvals));
1543 
1544   for (i = 0; i < mbs; i++) {
1545     rvals[0] = bs * (baij->rstartbs + i);
1546     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1547     for (j = ai[i]; j < ai[i + 1]; j++) {
1548       col = (baij->cstartbs + aj[j]) * bs;
1549       for (k = 0; k < bs; k++) {
1550         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1551 
1552         col++;
1553         a += bs;
1554       }
1555     }
1556   }
1557   /* copy over the B part */
1558   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1559   ai   = Aloc->i;
1560   aj   = Aloc->j;
1561   a    = Aloc->a;
1562   for (i = 0; i < mbs; i++) {
1563     rvals[0] = bs * (baij->rstartbs + i);
1564     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1565     for (j = ai[i]; j < ai[i + 1]; j++) {
1566       col = baij->garray[aj[j]] * bs;
1567       for (k = 0; k < bs; k++) {
1568         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1569         col++;
1570         a += bs;
1571       }
1572     }
1573   }
1574   PetscCall(PetscFree(rvals));
1575   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1576   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1577 
1578   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1579   else PetscCall(MatHeaderMerge(A, &B));
1580   PetscFunctionReturn(PETSC_SUCCESS);
1581 }
1582 
1583 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1584 {
1585   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1586   Mat          a = baij->A, b = baij->B;
1587   PetscInt     s1, s2, s3;
1588 
1589   PetscFunctionBegin;
1590   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1591   if (rr) {
1592     PetscCall(VecGetLocalSize(rr, &s1));
1593     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1594     /* Overlap communication with computation. */
1595     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1596   }
1597   if (ll) {
1598     PetscCall(VecGetLocalSize(ll, &s1));
1599     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1600     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1601   }
1602   /* scale  the diagonal block */
1603   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1604 
1605   if (rr) {
1606     /* Do a scatter end and then right scale the off-diagonal block */
1607     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1608     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1609   }
1610   PetscFunctionReturn(PETSC_SUCCESS);
1611 }
1612 
1613 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1614 {
1615   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1616   PetscInt    *lrows;
1617   PetscInt     r, len;
1618   PetscBool    cong;
1619 
1620   PetscFunctionBegin;
1621   /* get locally owned rows */
1622   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1623   /* fix right hand side if needed */
1624   if (x && b) {
1625     const PetscScalar *xx;
1626     PetscScalar       *bb;
1627 
1628     PetscCall(VecGetArrayRead(x, &xx));
1629     PetscCall(VecGetArray(b, &bb));
1630     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1631     PetscCall(VecRestoreArrayRead(x, &xx));
1632     PetscCall(VecRestoreArray(b, &bb));
1633   }
1634 
1635   /* actually zap the local rows */
1636   /*
1637         Zero the required rows. If the "diagonal block" of the matrix
1638      is square and the user wishes to set the diagonal we use separate
1639      code so that MatSetValues() is not called for each diagonal allocating
1640      new memory, thus calling lots of mallocs and slowing things down.
1641 
1642   */
1643   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1644   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1645   PetscCall(MatHasCongruentLayouts(A, &cong));
1646   if ((diag != 0.0) && cong) {
1647     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1648   } else if (diag != 0.0) {
1649     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1650     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1651        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1652     for (r = 0; r < len; ++r) {
1653       const PetscInt row = lrows[r] + A->rmap->rstart;
1654       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1655     }
1656     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1657     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1658   } else {
1659     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1660   }
1661   PetscCall(PetscFree(lrows));
1662 
1663   /* only change matrix nonzero state if pattern was allowed to be changed */
1664   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1665     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1666     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1667   }
1668   PetscFunctionReturn(PETSC_SUCCESS);
1669 }
1670 
1671 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1672 {
1673   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1674   PetscMPIInt        n = A->rmap->n, p = 0;
1675   PetscInt           i, j, k, r, len = 0, row, col, count;
1676   PetscInt          *lrows, *owners = A->rmap->range;
1677   PetscSFNode       *rrows;
1678   PetscSF            sf;
1679   const PetscScalar *xx;
1680   PetscScalar       *bb, *mask;
1681   Vec                xmask, lmask;
1682   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1683   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1684   PetscScalar       *aa;
1685 
1686   PetscFunctionBegin;
1687   /* Create SF where leaves are input rows and roots are owned rows */
1688   PetscCall(PetscMalloc1(n, &lrows));
1689   for (r = 0; r < n; ++r) lrows[r] = -1;
1690   PetscCall(PetscMalloc1(N, &rrows));
1691   for (r = 0; r < N; ++r) {
1692     const PetscInt idx = rows[r];
1693     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1694     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1695       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1696     }
1697     rrows[r].rank  = p;
1698     rrows[r].index = rows[r] - owners[p];
1699   }
1700   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1701   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1702   /* Collect flags for rows to be zeroed */
1703   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1704   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1705   PetscCall(PetscSFDestroy(&sf));
1706   /* Compress and put in row numbers */
1707   for (r = 0; r < n; ++r)
1708     if (lrows[r] >= 0) lrows[len++] = r;
1709   /* zero diagonal part of matrix */
1710   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1711   /* handle off diagonal part of matrix */
1712   PetscCall(MatCreateVecs(A, &xmask, NULL));
1713   PetscCall(VecDuplicate(l->lvec, &lmask));
1714   PetscCall(VecGetArray(xmask, &bb));
1715   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1716   PetscCall(VecRestoreArray(xmask, &bb));
1717   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1718   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1719   PetscCall(VecDestroy(&xmask));
1720   if (x) {
1721     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1722     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1723     PetscCall(VecGetArrayRead(l->lvec, &xx));
1724     PetscCall(VecGetArray(b, &bb));
1725   }
1726   PetscCall(VecGetArray(lmask, &mask));
1727   /* remove zeroed rows of off diagonal matrix */
1728   for (i = 0; i < len; ++i) {
1729     row   = lrows[i];
1730     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1731     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1732     for (k = 0; k < count; ++k) {
1733       aa[0] = 0.0;
1734       aa += bs;
1735     }
1736   }
1737   /* loop over all elements of off process part of matrix zeroing removed columns*/
1738   for (i = 0; i < l->B->rmap->N; ++i) {
1739     row = i / bs;
1740     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1741       for (k = 0; k < bs; ++k) {
1742         col = bs * baij->j[j] + k;
1743         if (PetscAbsScalar(mask[col])) {
1744           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1745           if (x) bb[i] -= aa[0] * xx[col];
1746           aa[0] = 0.0;
1747         }
1748       }
1749     }
1750   }
1751   if (x) {
1752     PetscCall(VecRestoreArray(b, &bb));
1753     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1754   }
1755   PetscCall(VecRestoreArray(lmask, &mask));
1756   PetscCall(VecDestroy(&lmask));
1757   PetscCall(PetscFree(lrows));
1758 
1759   /* only change matrix nonzero state if pattern was allowed to be changed */
1760   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1761     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1762     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1763   }
1764   PetscFunctionReturn(PETSC_SUCCESS);
1765 }
1766 
1767 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1768 {
1769   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1770 
1771   PetscFunctionBegin;
1772   PetscCall(MatSetUnfactored(a->A));
1773   PetscFunctionReturn(PETSC_SUCCESS);
1774 }
1775 
1776 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1777 
1778 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1779 {
1780   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1781   Mat          a, b, c, d;
1782   PetscBool    flg;
1783 
1784   PetscFunctionBegin;
1785   a = matA->A;
1786   b = matA->B;
1787   c = matB->A;
1788   d = matB->B;
1789 
1790   PetscCall(MatEqual(a, c, &flg));
1791   if (flg) PetscCall(MatEqual(b, d, &flg));
1792   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1793   PetscFunctionReturn(PETSC_SUCCESS);
1794 }
1795 
1796 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1797 {
1798   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1799   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1800 
1801   PetscFunctionBegin;
1802   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1803   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1804     PetscCall(MatCopy_Basic(A, B, str));
1805   } else {
1806     PetscCall(MatCopy(a->A, b->A, str));
1807     PetscCall(MatCopy(a->B, b->B, str));
1808   }
1809   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1810   PetscFunctionReturn(PETSC_SUCCESS);
1811 }
1812 
1813 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1814 {
1815   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1816   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1817   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1818 
1819   PetscFunctionBegin;
1820   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1821   PetscFunctionReturn(PETSC_SUCCESS);
1822 }
1823 
1824 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1825 {
1826   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1827   PetscBLASInt bnz, one                         = 1;
1828   Mat_SeqBAIJ *x, *y;
1829   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1830 
1831   PetscFunctionBegin;
1832   if (str == SAME_NONZERO_PATTERN) {
1833     PetscScalar alpha = a;
1834     x                 = (Mat_SeqBAIJ *)xx->A->data;
1835     y                 = (Mat_SeqBAIJ *)yy->A->data;
1836     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1837     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1838     x = (Mat_SeqBAIJ *)xx->B->data;
1839     y = (Mat_SeqBAIJ *)yy->B->data;
1840     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1841     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1842     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1843   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1844     PetscCall(MatAXPY_Basic(Y, a, X, str));
1845   } else {
1846     Mat       B;
1847     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1848     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1849     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1850     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1851     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1852     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1853     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1854     PetscCall(MatSetType(B, MATMPIBAIJ));
1855     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1856     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1857     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1858     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1859     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1860     PetscCall(MatHeaderMerge(Y, &B));
1861     PetscCall(PetscFree(nnz_d));
1862     PetscCall(PetscFree(nnz_o));
1863   }
1864   PetscFunctionReturn(PETSC_SUCCESS);
1865 }
1866 
1867 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1868 {
1869   PetscFunctionBegin;
1870   if (PetscDefined(USE_COMPLEX)) {
1871     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1872 
1873     PetscCall(MatConjugate_SeqBAIJ(a->A));
1874     PetscCall(MatConjugate_SeqBAIJ(a->B));
1875   }
1876   PetscFunctionReturn(PETSC_SUCCESS);
1877 }
1878 
1879 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1880 {
1881   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1882 
1883   PetscFunctionBegin;
1884   PetscCall(MatRealPart(a->A));
1885   PetscCall(MatRealPart(a->B));
1886   PetscFunctionReturn(PETSC_SUCCESS);
1887 }
1888 
1889 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1890 {
1891   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1892 
1893   PetscFunctionBegin;
1894   PetscCall(MatImaginaryPart(a->A));
1895   PetscCall(MatImaginaryPart(a->B));
1896   PetscFunctionReturn(PETSC_SUCCESS);
1897 }
1898 
1899 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1900 {
1901   IS       iscol_local;
1902   PetscInt csize;
1903 
1904   PetscFunctionBegin;
1905   PetscCall(ISGetLocalSize(iscol, &csize));
1906   if (call == MAT_REUSE_MATRIX) {
1907     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1908     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1909   } else {
1910     PetscCall(ISAllGather(iscol, &iscol_local));
1911   }
1912   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1913   if (call == MAT_INITIAL_MATRIX) {
1914     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1915     PetscCall(ISDestroy(&iscol_local));
1916   }
1917   PetscFunctionReturn(PETSC_SUCCESS);
1918 }
1919 
1920 /*
1921   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1922   in local and then by concatenating the local matrices the end result.
1923   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1924   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1925 */
1926 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
1927 {
1928   PetscMPIInt  rank, size;
1929   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1930   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1931   Mat          M, Mreuse;
1932   MatScalar   *vwork, *aa;
1933   MPI_Comm     comm;
1934   IS           isrow_new, iscol_new;
1935   Mat_SeqBAIJ *aij;
1936 
1937   PetscFunctionBegin;
1938   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1939   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1940   PetscCallMPI(MPI_Comm_size(comm, &size));
1941   /* The compression and expansion should be avoided. Doesn't point
1942      out errors, might change the indices, hence buggey */
1943   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1944   if (isrow == iscol) {
1945     iscol_new = isrow_new;
1946     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1947   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1948 
1949   if (call == MAT_REUSE_MATRIX) {
1950     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1951     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1952     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse));
1953   } else {
1954     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse));
1955   }
1956   PetscCall(ISDestroy(&isrow_new));
1957   PetscCall(ISDestroy(&iscol_new));
1958   /*
1959       m - number of local rows
1960       n - number of columns (same on all processors)
1961       rstart - first row in new global matrix generated
1962   */
1963   PetscCall(MatGetBlockSize(mat, &bs));
1964   PetscCall(MatGetSize(Mreuse, &m, &n));
1965   m = m / bs;
1966   n = n / bs;
1967 
1968   if (call == MAT_INITIAL_MATRIX) {
1969     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1970     ii  = aij->i;
1971     jj  = aij->j;
1972 
1973     /*
1974         Determine the number of non-zeros in the diagonal and off-diagonal
1975         portions of the matrix in order to do correct preallocation
1976     */
1977 
1978     /* first get start and end of "diagonal" columns */
1979     if (csize == PETSC_DECIDE) {
1980       PetscCall(ISGetSize(isrow, &mglobal));
1981       if (mglobal == n * bs) { /* square matrix */
1982         nlocal = m;
1983       } else {
1984         nlocal = n / size + ((n % size) > rank);
1985       }
1986     } else {
1987       nlocal = csize / bs;
1988     }
1989     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1990     rstart = rend - nlocal;
1991     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1992 
1993     /* next, compute all the lengths */
1994     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1995     for (i = 0; i < m; i++) {
1996       jend = ii[i + 1] - ii[i];
1997       olen = 0;
1998       dlen = 0;
1999       for (j = 0; j < jend; j++) {
2000         if (*jj < rstart || *jj >= rend) olen++;
2001         else dlen++;
2002         jj++;
2003       }
2004       olens[i] = olen;
2005       dlens[i] = dlen;
2006     }
2007     PetscCall(MatCreate(comm, &M));
2008     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2009     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
2010     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2011     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2012     PetscCall(PetscFree2(dlens, olens));
2013   } else {
2014     PetscInt ml, nl;
2015 
2016     M = *newmat;
2017     PetscCall(MatGetLocalSize(M, &ml, &nl));
2018     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2019     PetscCall(MatZeroEntries(M));
2020     /*
2021          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2022        rather than the slower MatSetValues().
2023     */
2024     M->was_assembled = PETSC_TRUE;
2025     M->assembled     = PETSC_FALSE;
2026   }
2027   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2028   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2029   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2030   ii  = aij->i;
2031   jj  = aij->j;
2032   aa  = aij->a;
2033   for (i = 0; i < m; i++) {
2034     row   = rstart / bs + i;
2035     nz    = ii[i + 1] - ii[i];
2036     cwork = jj;
2037     jj += nz;
2038     vwork = aa;
2039     aa += nz * bs * bs;
2040     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2041   }
2042 
2043   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2044   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2045   *newmat = M;
2046 
2047   /* save submatrix used in processor for next request */
2048   if (call == MAT_INITIAL_MATRIX) {
2049     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2050     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2051   }
2052   PetscFunctionReturn(PETSC_SUCCESS);
2053 }
2054 
2055 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2056 {
2057   MPI_Comm        comm, pcomm;
2058   PetscInt        clocal_size, nrows;
2059   const PetscInt *rows;
2060   PetscMPIInt     size;
2061   IS              crowp, lcolp;
2062 
2063   PetscFunctionBegin;
2064   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2065   /* make a collective version of 'rowp' */
2066   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2067   if (pcomm == comm) {
2068     crowp = rowp;
2069   } else {
2070     PetscCall(ISGetSize(rowp, &nrows));
2071     PetscCall(ISGetIndices(rowp, &rows));
2072     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2073     PetscCall(ISRestoreIndices(rowp, &rows));
2074   }
2075   PetscCall(ISSetPermutation(crowp));
2076   /* make a local version of 'colp' */
2077   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2078   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2079   if (size == 1) {
2080     lcolp = colp;
2081   } else {
2082     PetscCall(ISAllGather(colp, &lcolp));
2083   }
2084   PetscCall(ISSetPermutation(lcolp));
2085   /* now we just get the submatrix */
2086   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2087   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B));
2088   /* clean up */
2089   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2090   if (size > 1) PetscCall(ISDestroy(&lcolp));
2091   PetscFunctionReturn(PETSC_SUCCESS);
2092 }
2093 
2094 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2095 {
2096   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2097   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2098 
2099   PetscFunctionBegin;
2100   if (nghosts) *nghosts = B->nbs;
2101   if (ghosts) *ghosts = baij->garray;
2102   PetscFunctionReturn(PETSC_SUCCESS);
2103 }
2104 
2105 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2106 {
2107   Mat          B;
2108   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2109   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2110   Mat_SeqAIJ  *b;
2111   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2112   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2113   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2114 
2115   PetscFunctionBegin;
2116   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2117   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2118 
2119   /*   Tell every processor the number of nonzeros per row  */
2120   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2121   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2122   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2123   displs = recvcounts + size;
2124   for (i = 0; i < size; i++) {
2125     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2126     displs[i]     = A->rmap->range[i] / bs;
2127   }
2128   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2129   /* Create the sequential matrix of the same type as the local block diagonal  */
2130   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2131   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2132   PetscCall(MatSetType(B, MATSEQAIJ));
2133   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2134   b = (Mat_SeqAIJ *)B->data;
2135 
2136   /*     Copy my part of matrix column indices over  */
2137   sendcount  = ad->nz + bd->nz;
2138   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2139   a_jsendbuf = ad->j;
2140   b_jsendbuf = bd->j;
2141   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2142   cnt        = 0;
2143   for (i = 0; i < n; i++) {
2144     /* put in lower diagonal portion */
2145     m = bd->i[i + 1] - bd->i[i];
2146     while (m > 0) {
2147       /* is it above diagonal (in bd (compressed) numbering) */
2148       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2149       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2150       m--;
2151     }
2152 
2153     /* put in diagonal portion */
2154     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2155 
2156     /* put in upper diagonal portion */
2157     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2158   }
2159   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2160 
2161   /*  Gather all column indices to all processors  */
2162   for (i = 0; i < size; i++) {
2163     recvcounts[i] = 0;
2164     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2165   }
2166   displs[0] = 0;
2167   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2168   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2169   /*  Assemble the matrix into useable form (note numerical values not yet set)  */
2170   /* set the b->ilen (length of each row) values */
2171   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2172   /* set the b->i indices */
2173   b->i[0] = 0;
2174   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2175   PetscCall(PetscFree(lens));
2176   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2177   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2178   PetscCall(PetscFree(recvcounts));
2179 
2180   PetscCall(MatPropagateSymmetryOptions(A, B));
2181   *newmat = B;
2182   PetscFunctionReturn(PETSC_SUCCESS);
2183 }
2184 
2185 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2186 {
2187   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2188   Vec          bb1 = NULL;
2189 
2190   PetscFunctionBegin;
2191   if (flag == SOR_APPLY_UPPER) {
2192     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2193     PetscFunctionReturn(PETSC_SUCCESS);
2194   }
2195 
2196   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2197 
2198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2199     if (flag & SOR_ZERO_INITIAL_GUESS) {
2200       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2201       its--;
2202     }
2203 
2204     while (its--) {
2205       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2206       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2207 
2208       /* update rhs: bb1 = bb - B*x */
2209       PetscCall(VecScale(mat->lvec, -1.0));
2210       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2211 
2212       /* local sweep */
2213       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2214     }
2215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2216     if (flag & SOR_ZERO_INITIAL_GUESS) {
2217       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2218       its--;
2219     }
2220     while (its--) {
2221       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2222       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2223 
2224       /* update rhs: bb1 = bb - B*x */
2225       PetscCall(VecScale(mat->lvec, -1.0));
2226       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2227 
2228       /* local sweep */
2229       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2230     }
2231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2232     if (flag & SOR_ZERO_INITIAL_GUESS) {
2233       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2234       its--;
2235     }
2236     while (its--) {
2237       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2238       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2239 
2240       /* update rhs: bb1 = bb - B*x */
2241       PetscCall(VecScale(mat->lvec, -1.0));
2242       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2243 
2244       /* local sweep */
2245       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2246     }
2247   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2248 
2249   PetscCall(VecDestroy(&bb1));
2250   PetscFunctionReturn(PETSC_SUCCESS);
2251 }
2252 
2253 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2254 {
2255   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2256   PetscInt     m, N, i, *garray = aij->garray;
2257   PetscInt     ib, jb, bs = A->rmap->bs;
2258   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2259   MatScalar   *a_val = a_aij->a;
2260   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2261   MatScalar   *b_val = b_aij->a;
2262   PetscReal   *work;
2263 
2264   PetscFunctionBegin;
2265   PetscCall(MatGetSize(A, &m, &N));
2266   PetscCall(PetscCalloc1(N, &work));
2267   if (type == NORM_2) {
2268     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2269       for (jb = 0; jb < bs; jb++) {
2270         for (ib = 0; ib < bs; ib++) {
2271           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2272           a_val++;
2273         }
2274       }
2275     }
2276     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2277       for (jb = 0; jb < bs; jb++) {
2278         for (ib = 0; ib < bs; ib++) {
2279           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2280           b_val++;
2281         }
2282       }
2283     }
2284   } else if (type == NORM_1) {
2285     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2286       for (jb = 0; jb < bs; jb++) {
2287         for (ib = 0; ib < bs; ib++) {
2288           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2289           a_val++;
2290         }
2291       }
2292     }
2293     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2294       for (jb = 0; jb < bs; jb++) {
2295         for (ib = 0; ib < bs; ib++) {
2296           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2297           b_val++;
2298         }
2299       }
2300     }
2301   } else if (type == NORM_INFINITY) {
2302     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2303       for (jb = 0; jb < bs; jb++) {
2304         for (ib = 0; ib < bs; ib++) {
2305           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2306           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2307           a_val++;
2308         }
2309       }
2310     }
2311     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2312       for (jb = 0; jb < bs; jb++) {
2313         for (ib = 0; ib < bs; ib++) {
2314           int col   = garray[b_aij->j[i]] * bs + jb;
2315           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2316           b_val++;
2317         }
2318       }
2319     }
2320   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2321     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2322       for (jb = 0; jb < bs; jb++) {
2323         for (ib = 0; ib < bs; ib++) {
2324           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2325           a_val++;
2326         }
2327       }
2328     }
2329     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2330       for (jb = 0; jb < bs; jb++) {
2331         for (ib = 0; ib < bs; ib++) {
2332           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2333           b_val++;
2334         }
2335       }
2336     }
2337   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2338     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2339       for (jb = 0; jb < bs; jb++) {
2340         for (ib = 0; ib < bs; ib++) {
2341           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2342           a_val++;
2343         }
2344       }
2345     }
2346     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2347       for (jb = 0; jb < bs; jb++) {
2348         for (ib = 0; ib < bs; ib++) {
2349           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2350           b_val++;
2351         }
2352       }
2353     }
2354   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2355   if (type == NORM_INFINITY) {
2356     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2357   } else {
2358     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2359   }
2360   PetscCall(PetscFree(work));
2361   if (type == NORM_2) {
2362     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2363   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2364     for (i = 0; i < N; i++) reductions[i] /= m;
2365   }
2366   PetscFunctionReturn(PETSC_SUCCESS);
2367 }
2368 
2369 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2370 {
2371   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2372 
2373   PetscFunctionBegin;
2374   PetscCall(MatInvertBlockDiagonal(a->A, values));
2375   A->factorerrortype             = a->A->factorerrortype;
2376   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2377   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2378   PetscFunctionReturn(PETSC_SUCCESS);
2379 }
2380 
2381 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2382 {
2383   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2384   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2385 
2386   PetscFunctionBegin;
2387   if (!Y->preallocated) {
2388     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2389   } else if (!aij->nz) {
2390     PetscInt nonew = aij->nonew;
2391     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2392     aij->nonew = nonew;
2393   }
2394   PetscCall(MatShift_Basic(Y, a));
2395   PetscFunctionReturn(PETSC_SUCCESS);
2396 }
2397 
2398 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2399 {
2400   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2401 
2402   PetscFunctionBegin;
2403   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2404   PetscCall(MatMissingDiagonal(a->A, missing, d));
2405   if (d) {
2406     PetscInt rstart;
2407     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2408     *d += rstart / A->rmap->bs;
2409   }
2410   PetscFunctionReturn(PETSC_SUCCESS);
2411 }
2412 
2413 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2414 {
2415   PetscFunctionBegin;
2416   *a = ((Mat_MPIBAIJ *)A->data)->A;
2417   PetscFunctionReturn(PETSC_SUCCESS);
2418 }
2419 
2420 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2421 {
2422   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2423 
2424   PetscFunctionBegin;
2425   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2426   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2427   PetscFunctionReturn(PETSC_SUCCESS);
2428 }
2429 
2430 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2431                                        MatGetRow_MPIBAIJ,
2432                                        MatRestoreRow_MPIBAIJ,
2433                                        MatMult_MPIBAIJ,
2434                                        /* 4*/ MatMultAdd_MPIBAIJ,
2435                                        MatMultTranspose_MPIBAIJ,
2436                                        MatMultTransposeAdd_MPIBAIJ,
2437                                        NULL,
2438                                        NULL,
2439                                        NULL,
2440                                        /*10*/ NULL,
2441                                        NULL,
2442                                        NULL,
2443                                        MatSOR_MPIBAIJ,
2444                                        MatTranspose_MPIBAIJ,
2445                                        /*15*/ MatGetInfo_MPIBAIJ,
2446                                        MatEqual_MPIBAIJ,
2447                                        MatGetDiagonal_MPIBAIJ,
2448                                        MatDiagonalScale_MPIBAIJ,
2449                                        MatNorm_MPIBAIJ,
2450                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2451                                        MatAssemblyEnd_MPIBAIJ,
2452                                        MatSetOption_MPIBAIJ,
2453                                        MatZeroEntries_MPIBAIJ,
2454                                        /*24*/ MatZeroRows_MPIBAIJ,
2455                                        NULL,
2456                                        NULL,
2457                                        NULL,
2458                                        NULL,
2459                                        /*29*/ MatSetUp_MPI_Hash,
2460                                        NULL,
2461                                        NULL,
2462                                        MatGetDiagonalBlock_MPIBAIJ,
2463                                        NULL,
2464                                        /*34*/ MatDuplicate_MPIBAIJ,
2465                                        NULL,
2466                                        NULL,
2467                                        NULL,
2468                                        NULL,
2469                                        /*39*/ MatAXPY_MPIBAIJ,
2470                                        MatCreateSubMatrices_MPIBAIJ,
2471                                        MatIncreaseOverlap_MPIBAIJ,
2472                                        MatGetValues_MPIBAIJ,
2473                                        MatCopy_MPIBAIJ,
2474                                        /*44*/ NULL,
2475                                        MatScale_MPIBAIJ,
2476                                        MatShift_MPIBAIJ,
2477                                        NULL,
2478                                        MatZeroRowsColumns_MPIBAIJ,
2479                                        /*49*/ NULL,
2480                                        NULL,
2481                                        NULL,
2482                                        NULL,
2483                                        NULL,
2484                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2485                                        NULL,
2486                                        MatSetUnfactored_MPIBAIJ,
2487                                        MatPermute_MPIBAIJ,
2488                                        MatSetValuesBlocked_MPIBAIJ,
2489                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2490                                        MatDestroy_MPIBAIJ,
2491                                        MatView_MPIBAIJ,
2492                                        NULL,
2493                                        NULL,
2494                                        /*64*/ NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        NULL,
2498                                        NULL,
2499                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2500                                        NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        NULL,
2504                                        /*74*/ NULL,
2505                                        MatFDColoringApply_BAIJ,
2506                                        NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        /*79*/ NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        NULL,
2513                                        MatLoad_MPIBAIJ,
2514                                        /*84*/ NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        /*89*/ NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        NULL,
2523                                        NULL,
2524                                        /*94*/ NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        NULL,
2528                                        NULL,
2529                                        /*99*/ NULL,
2530                                        NULL,
2531                                        NULL,
2532                                        MatConjugate_MPIBAIJ,
2533                                        NULL,
2534                                        /*104*/ NULL,
2535                                        MatRealPart_MPIBAIJ,
2536                                        MatImaginaryPart_MPIBAIJ,
2537                                        NULL,
2538                                        NULL,
2539                                        /*109*/ NULL,
2540                                        NULL,
2541                                        NULL,
2542                                        NULL,
2543                                        MatMissingDiagonal_MPIBAIJ,
2544                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2545                                        NULL,
2546                                        MatGetGhosts_MPIBAIJ,
2547                                        NULL,
2548                                        NULL,
2549                                        /*119*/ NULL,
2550                                        NULL,
2551                                        NULL,
2552                                        NULL,
2553                                        MatGetMultiProcBlock_MPIBAIJ,
2554                                        /*124*/ NULL,
2555                                        MatGetColumnReductions_MPIBAIJ,
2556                                        MatInvertBlockDiagonal_MPIBAIJ,
2557                                        NULL,
2558                                        NULL,
2559                                        /*129*/ NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        /*134*/ NULL,
2565                                        NULL,
2566                                        NULL,
2567                                        NULL,
2568                                        NULL,
2569                                        /*139*/ MatSetBlockSizes_Default,
2570                                        NULL,
2571                                        NULL,
2572                                        MatFDColoringSetUp_MPIXAIJ,
2573                                        NULL,
2574                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2575                                        NULL,
2576                                        NULL,
2577                                        NULL,
2578                                        NULL,
2579                                        NULL,
2580                                        /*150*/ NULL,
2581                                        MatEliminateZeros_MPIBAIJ};
2582 
2583 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2584 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2585 
2586 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2587 {
2588   PetscInt        m, rstart, cstart, cend;
2589   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2590   const PetscInt *JJ          = NULL;
2591   PetscScalar    *values      = NULL;
2592   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2593   PetscBool       nooffprocentries;
2594 
2595   PetscFunctionBegin;
2596   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2597   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2598   PetscCall(PetscLayoutSetUp(B->rmap));
2599   PetscCall(PetscLayoutSetUp(B->cmap));
2600   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2601   m      = B->rmap->n / bs;
2602   rstart = B->rmap->rstart / bs;
2603   cstart = B->cmap->rstart / bs;
2604   cend   = B->cmap->rend / bs;
2605 
2606   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2607   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2608   for (i = 0; i < m; i++) {
2609     nz = ii[i + 1] - ii[i];
2610     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2611     nz_max = PetscMax(nz_max, nz);
2612     dlen   = 0;
2613     olen   = 0;
2614     JJ     = jj + ii[i];
2615     for (j = 0; j < nz; j++) {
2616       if (*JJ < cstart || *JJ >= cend) olen++;
2617       else dlen++;
2618       JJ++;
2619     }
2620     d_nnz[i] = dlen;
2621     o_nnz[i] = olen;
2622   }
2623   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2624   PetscCall(PetscFree2(d_nnz, o_nnz));
2625 
2626   values = (PetscScalar *)V;
2627   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2628   for (i = 0; i < m; i++) {
2629     PetscInt        row   = i + rstart;
2630     PetscInt        ncols = ii[i + 1] - ii[i];
2631     const PetscInt *icols = jj + ii[i];
2632     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2633       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2634       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2635     } else { /* block ordering does not match so we can only insert one block at a time. */
2636       PetscInt j;
2637       for (j = 0; j < ncols; j++) {
2638         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2639         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2640       }
2641     }
2642   }
2643 
2644   if (!V) PetscCall(PetscFree(values));
2645   nooffprocentries    = B->nooffprocentries;
2646   B->nooffprocentries = PETSC_TRUE;
2647   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2648   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2649   B->nooffprocentries = nooffprocentries;
2650 
2651   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2652   PetscFunctionReturn(PETSC_SUCCESS);
2653 }
2654 
2655 /*@C
2656   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2657 
2658   Collective
2659 
2660   Input Parameters:
2661 + B  - the matrix
2662 . bs - the block size
2663 . i  - the indices into `j` for the start of each local row (starts with zero)
2664 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2665 - v  - optional values in the matrix
2666 
2667   Level: advanced
2668 
2669   Notes:
2670   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2671   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2672   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2673   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2674   block column and the second index is over columns within a block.
2675 
2676   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2677 
2678 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2679 @*/
2680 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2681 {
2682   PetscFunctionBegin;
2683   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2684   PetscValidType(B, 1);
2685   PetscValidLogicalCollectiveInt(B, bs, 2);
2686   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2691 {
2692   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2693   PetscInt     i;
2694   PetscMPIInt  size;
2695 
2696   PetscFunctionBegin;
2697   if (B->hash_active) {
2698     B->ops[0]      = b->cops;
2699     B->hash_active = PETSC_FALSE;
2700   }
2701   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2702   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2703   PetscCall(PetscLayoutSetUp(B->rmap));
2704   PetscCall(PetscLayoutSetUp(B->cmap));
2705   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2706 
2707   if (d_nnz) {
2708     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2709   }
2710   if (o_nnz) {
2711     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2712   }
2713 
2714   b->bs2 = bs * bs;
2715   b->mbs = B->rmap->n / bs;
2716   b->nbs = B->cmap->n / bs;
2717   b->Mbs = B->rmap->N / bs;
2718   b->Nbs = B->cmap->N / bs;
2719 
2720   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2721   b->rstartbs = B->rmap->rstart / bs;
2722   b->rendbs   = B->rmap->rend / bs;
2723   b->cstartbs = B->cmap->rstart / bs;
2724   b->cendbs   = B->cmap->rend / bs;
2725 
2726 #if defined(PETSC_USE_CTABLE)
2727   PetscCall(PetscHMapIDestroy(&b->colmap));
2728 #else
2729   PetscCall(PetscFree(b->colmap));
2730 #endif
2731   PetscCall(PetscFree(b->garray));
2732   PetscCall(VecDestroy(&b->lvec));
2733   PetscCall(VecScatterDestroy(&b->Mvctx));
2734 
2735   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2736   PetscCall(MatDestroy(&b->B));
2737   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2738   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2739   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2740 
2741   PetscCall(MatDestroy(&b->A));
2742   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2743   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2744   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2745 
2746   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2747   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2748   B->preallocated  = PETSC_TRUE;
2749   B->was_assembled = PETSC_FALSE;
2750   B->assembled     = PETSC_FALSE;
2751   PetscFunctionReturn(PETSC_SUCCESS);
2752 }
2753 
2754 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2755 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2756 
2757 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2758 {
2759   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2760   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2761   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2762   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2763 
2764   PetscFunctionBegin;
2765   PetscCall(PetscMalloc1(M + 1, &ii));
2766   ii[0] = 0;
2767   for (i = 0; i < M; i++) {
2768     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2769     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2770     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2771     /* remove one from count of matrix has diagonal */
2772     for (j = id[i]; j < id[i + 1]; j++) {
2773       if (jd[j] == i) {
2774         ii[i + 1]--;
2775         break;
2776       }
2777     }
2778   }
2779   PetscCall(PetscMalloc1(ii[M], &jj));
2780   cnt = 0;
2781   for (i = 0; i < M; i++) {
2782     for (j = io[i]; j < io[i + 1]; j++) {
2783       if (garray[jo[j]] > rstart) break;
2784       jj[cnt++] = garray[jo[j]];
2785     }
2786     for (k = id[i]; k < id[i + 1]; k++) {
2787       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2788     }
2789     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2790   }
2791   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2792   PetscFunctionReturn(PETSC_SUCCESS);
2793 }
2794 
2795 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2796 
2797 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2798 
2799 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2800 {
2801   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2802   Mat_MPIAIJ  *b;
2803   Mat          B;
2804 
2805   PetscFunctionBegin;
2806   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2807 
2808   if (reuse == MAT_REUSE_MATRIX) {
2809     B = *newmat;
2810   } else {
2811     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2812     PetscCall(MatSetType(B, MATMPIAIJ));
2813     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2814     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2815     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2816     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2817   }
2818   b = (Mat_MPIAIJ *)B->data;
2819 
2820   if (reuse == MAT_REUSE_MATRIX) {
2821     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2822     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2823   } else {
2824     PetscInt   *garray = a->garray;
2825     Mat_SeqAIJ *bB;
2826     PetscInt    bs, nnz;
2827     PetscCall(MatDestroy(&b->A));
2828     PetscCall(MatDestroy(&b->B));
2829     /* just clear out the data structure */
2830     PetscCall(MatDisAssemble_MPIAIJ(B));
2831     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2832     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2833 
2834     /* Global numbering for b->B columns */
2835     bB  = (Mat_SeqAIJ *)b->B->data;
2836     bs  = A->rmap->bs;
2837     nnz = bB->i[A->rmap->n];
2838     for (PetscInt k = 0; k < nnz; k++) {
2839       PetscInt bj = bB->j[k] / bs;
2840       PetscInt br = bB->j[k] % bs;
2841       bB->j[k]    = garray[bj] * bs + br;
2842     }
2843   }
2844   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2845   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2846 
2847   if (reuse == MAT_INPLACE_MATRIX) {
2848     PetscCall(MatHeaderReplace(A, &B));
2849   } else {
2850     *newmat = B;
2851   }
2852   PetscFunctionReturn(PETSC_SUCCESS);
2853 }
2854 
2855 /*MC
2856    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2857 
2858    Options Database Keys:
2859 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2860 . -mat_block_size <bs> - set the blocksize used to store the matrix
2861 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2862 - -mat_use_hash_table <fact> - set hash table factor
2863 
2864    Level: beginner
2865 
2866    Note:
2867     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2868     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2869 
2870 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2871 M*/
2872 
2873 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2874 
2875 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2876 {
2877   Mat_MPIBAIJ *b;
2878   PetscBool    flg = PETSC_FALSE;
2879 
2880   PetscFunctionBegin;
2881   PetscCall(PetscNew(&b));
2882   B->data      = (void *)b;
2883   B->ops[0]    = MatOps_Values;
2884   B->assembled = PETSC_FALSE;
2885 
2886   B->insertmode = NOT_SET_VALUES;
2887   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2888   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2889 
2890   /* build local table of row and column ownerships */
2891   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2892 
2893   /* build cache for off array entries formed */
2894   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2895 
2896   b->donotstash  = PETSC_FALSE;
2897   b->colmap      = NULL;
2898   b->garray      = NULL;
2899   b->roworiented = PETSC_TRUE;
2900 
2901   /* stuff used in block assembly */
2902   b->barray = NULL;
2903 
2904   /* stuff used for matrix vector multiply */
2905   b->lvec  = NULL;
2906   b->Mvctx = NULL;
2907 
2908   /* stuff for MatGetRow() */
2909   b->rowindices   = NULL;
2910   b->rowvalues    = NULL;
2911   b->getrowactive = PETSC_FALSE;
2912 
2913   /* hash table stuff */
2914   b->ht           = NULL;
2915   b->hd           = NULL;
2916   b->ht_size      = 0;
2917   b->ht_flag      = PETSC_FALSE;
2918   b->ht_fact      = 0;
2919   b->ht_total_ct  = 0;
2920   b->ht_insert_ct = 0;
2921 
2922   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2923   b->ijonly = PETSC_FALSE;
2924 
2925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2928 #if defined(PETSC_HAVE_HYPRE)
2929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2930 #endif
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2938   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2939 
2940   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2941   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2942   if (flg) {
2943     PetscReal fact = 1.39;
2944     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2945     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2946     if (fact <= 1.0) fact = 1.39;
2947     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2948     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2949   }
2950   PetscOptionsEnd();
2951   PetscFunctionReturn(PETSC_SUCCESS);
2952 }
2953 
2954 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2955 /*MC
2956    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2957 
2958    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2959    and `MATMPIBAIJ` otherwise.
2960 
2961    Options Database Keys:
2962 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2963 
2964   Level: beginner
2965 
2966 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2967 M*/
2968 
2969 /*@C
2970   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2971   (block compressed row).
2972 
2973   Collective
2974 
2975   Input Parameters:
2976 + B     - the matrix
2977 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2978           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2979 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2980            submatrix  (same for all local rows)
2981 . d_nnz - array containing the number of block nonzeros in the various block rows
2982            of the in diagonal portion of the local (possibly different for each block
2983            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2984            set it even if it is zero.
2985 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2986            submatrix (same for all local rows).
2987 - o_nnz - array containing the number of nonzeros in the various block rows of the
2988            off-diagonal portion of the local submatrix (possibly different for
2989            each block row) or `NULL`.
2990 
2991    If the *_nnz parameter is given then the *_nz parameter is ignored
2992 
2993   Options Database Keys:
2994 + -mat_block_size            - size of the blocks to use
2995 - -mat_use_hash_table <fact> - set hash table factor
2996 
2997   Level: intermediate
2998 
2999   Notes:
3000   For good matrix assembly performance
3001   the user should preallocate the matrix storage by setting the parameters
3002   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3003   performance can be increased by more than a factor of 50.
3004 
3005   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3006   than it must be used on all processors that share the object for that argument.
3007 
3008   Storage Information:
3009   For a square global matrix we define each processor's diagonal portion
3010   to be its local rows and the corresponding columns (a square submatrix);
3011   each processor's off-diagonal portion encompasses the remainder of the
3012   local matrix (a rectangular submatrix).
3013 
3014   The user can specify preallocated storage for the diagonal part of
3015   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3016   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3017   memory allocation.  Likewise, specify preallocated storage for the
3018   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3019 
3020   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3021   the figure below we depict these three local rows and all columns (0-11).
3022 
3023 .vb
3024            0 1 2 3 4 5 6 7 8 9 10 11
3025           --------------------------
3026    row 3  |o o o d d d o o o o  o  o
3027    row 4  |o o o d d d o o o o  o  o
3028    row 5  |o o o d d d o o o o  o  o
3029           --------------------------
3030 .ve
3031 
3032   Thus, any entries in the d locations are stored in the d (diagonal)
3033   submatrix, and any entries in the o locations are stored in the
3034   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3035   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3036 
3037   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3038   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3039   In general, for PDE problems in which most nonzeros are near the diagonal,
3040   one expects `d_nz` >> `o_nz`.
3041 
3042   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3043   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3044   You can also run with the option `-info` and look for messages with the string
3045   malloc in them to see if additional memory allocation was needed.
3046 
3047 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3048 @*/
3049 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3050 {
3051   PetscFunctionBegin;
3052   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3053   PetscValidType(B, 1);
3054   PetscValidLogicalCollectiveInt(B, bs, 2);
3055   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3056   PetscFunctionReturn(PETSC_SUCCESS);
3057 }
3058 
3059 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3060 /*@C
3061   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3062   (block compressed row).
3063 
3064   Collective
3065 
3066   Input Parameters:
3067 + comm  - MPI communicator
3068 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3069           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3070 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3071            This value should be the same as the local size used in creating the
3072            y vector for the matrix-vector product y = Ax.
3073 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3074            This value should be the same as the local size used in creating the
3075            x vector for the matrix-vector product y = Ax.
3076 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3077 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3078 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3079            submatrix  (same for all local rows)
3080 . d_nnz - array containing the number of nonzero blocks in the various block rows
3081            of the in diagonal portion of the local (possibly different for each block
3082            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3083            and set it even if it is zero.
3084 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3085            submatrix (same for all local rows).
3086 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3087            off-diagonal portion of the local submatrix (possibly different for
3088            each block row) or NULL.
3089 
3090   Output Parameter:
3091 . A - the matrix
3092 
3093   Options Database Keys:
3094 + -mat_block_size            - size of the blocks to use
3095 - -mat_use_hash_table <fact> - set hash table factor
3096 
3097   Level: intermediate
3098 
3099   Notes:
3100   For good matrix assembly performance
3101   the user should preallocate the matrix storage by setting the parameters
3102   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3103   performance can be increased by more than a factor of 50.
3104 
3105   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3106   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3107   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3108 
3109   If the *_nnz parameter is given then the *_nz parameter is ignored
3110 
3111   A nonzero block is any block that as 1 or more nonzeros in it
3112 
3113   The user MUST specify either the local or global matrix dimensions
3114   (possibly both).
3115 
3116   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3117   than it must be used on all processors that share the object for that argument.
3118 
3119   Storage Information:
3120   For a square global matrix we define each processor's diagonal portion
3121   to be its local rows and the corresponding columns (a square submatrix);
3122   each processor's off-diagonal portion encompasses the remainder of the
3123   local matrix (a rectangular submatrix).
3124 
3125   The user can specify preallocated storage for the diagonal part of
3126   the local submatrix with either d_nz or d_nnz (not both).  Set
3127   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3128   memory allocation.  Likewise, specify preallocated storage for the
3129   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3130 
3131   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3132   the figure below we depict these three local rows and all columns (0-11).
3133 
3134 .vb
3135            0 1 2 3 4 5 6 7 8 9 10 11
3136           --------------------------
3137    row 3  |o o o d d d o o o o  o  o
3138    row 4  |o o o d d d o o o o  o  o
3139    row 5  |o o o d d d o o o o  o  o
3140           --------------------------
3141 .ve
3142 
3143   Thus, any entries in the d locations are stored in the d (diagonal)
3144   submatrix, and any entries in the o locations are stored in the
3145   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3146   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3147 
3148   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3149   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3150   In general, for PDE problems in which most nonzeros are near the diagonal,
3151   one expects `d_nz` >> `o_nz`.
3152 
3153 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3154 @*/
3155 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3156 {
3157   PetscMPIInt size;
3158 
3159   PetscFunctionBegin;
3160   PetscCall(MatCreate(comm, A));
3161   PetscCall(MatSetSizes(*A, m, n, M, N));
3162   PetscCallMPI(MPI_Comm_size(comm, &size));
3163   if (size > 1) {
3164     PetscCall(MatSetType(*A, MATMPIBAIJ));
3165     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3166   } else {
3167     PetscCall(MatSetType(*A, MATSEQBAIJ));
3168     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3169   }
3170   PetscFunctionReturn(PETSC_SUCCESS);
3171 }
3172 
3173 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3174 {
3175   Mat          mat;
3176   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3177   PetscInt     len = 0;
3178 
3179   PetscFunctionBegin;
3180   *newmat = NULL;
3181   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3182   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3183   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3184 
3185   mat->factortype   = matin->factortype;
3186   mat->preallocated = PETSC_TRUE;
3187   mat->assembled    = PETSC_TRUE;
3188   mat->insertmode   = NOT_SET_VALUES;
3189 
3190   a             = (Mat_MPIBAIJ *)mat->data;
3191   mat->rmap->bs = matin->rmap->bs;
3192   a->bs2        = oldmat->bs2;
3193   a->mbs        = oldmat->mbs;
3194   a->nbs        = oldmat->nbs;
3195   a->Mbs        = oldmat->Mbs;
3196   a->Nbs        = oldmat->Nbs;
3197 
3198   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3199   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3200 
3201   a->size         = oldmat->size;
3202   a->rank         = oldmat->rank;
3203   a->donotstash   = oldmat->donotstash;
3204   a->roworiented  = oldmat->roworiented;
3205   a->rowindices   = NULL;
3206   a->rowvalues    = NULL;
3207   a->getrowactive = PETSC_FALSE;
3208   a->barray       = NULL;
3209   a->rstartbs     = oldmat->rstartbs;
3210   a->rendbs       = oldmat->rendbs;
3211   a->cstartbs     = oldmat->cstartbs;
3212   a->cendbs       = oldmat->cendbs;
3213 
3214   /* hash table stuff */
3215   a->ht           = NULL;
3216   a->hd           = NULL;
3217   a->ht_size      = 0;
3218   a->ht_flag      = oldmat->ht_flag;
3219   a->ht_fact      = oldmat->ht_fact;
3220   a->ht_total_ct  = 0;
3221   a->ht_insert_ct = 0;
3222 
3223   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3224   if (oldmat->colmap) {
3225 #if defined(PETSC_USE_CTABLE)
3226     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3227 #else
3228     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3229     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3230 #endif
3231   } else a->colmap = NULL;
3232 
3233   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3234     PetscCall(PetscMalloc1(len, &a->garray));
3235     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3236   } else a->garray = NULL;
3237 
3238   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3239   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3240   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3241 
3242   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3243   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3244   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3245   *newmat = mat;
3246   PetscFunctionReturn(PETSC_SUCCESS);
3247 }
3248 
3249 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3250 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3251 {
3252   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3253   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3254   PetscScalar *matvals;
3255 
3256   PetscFunctionBegin;
3257   PetscCall(PetscViewerSetUp(viewer));
3258 
3259   /* read in matrix header */
3260   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3261   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3262   M  = header[1];
3263   N  = header[2];
3264   nz = header[3];
3265   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3266   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3267   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3268 
3269   /* set block sizes from the viewer's .info file */
3270   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3271   /* set local sizes if not set already */
3272   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3273   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3274   /* set global sizes if not set already */
3275   if (mat->rmap->N < 0) mat->rmap->N = M;
3276   if (mat->cmap->N < 0) mat->cmap->N = N;
3277   PetscCall(PetscLayoutSetUp(mat->rmap));
3278   PetscCall(PetscLayoutSetUp(mat->cmap));
3279 
3280   /* check if the matrix sizes are correct */
3281   PetscCall(MatGetSize(mat, &rows, &cols));
3282   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3283   PetscCall(MatGetBlockSize(mat, &bs));
3284   PetscCall(MatGetLocalSize(mat, &m, &n));
3285   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3286   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3287   mbs = m / bs;
3288   nbs = n / bs;
3289 
3290   /* read in row lengths and build row indices */
3291   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3292   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3293   rowidxs[0] = 0;
3294   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3295   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3296   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3297 
3298   /* read in column indices and matrix values */
3299   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3300   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3301   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3302 
3303   {                /* preallocate matrix storage */
3304     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3305     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3306     PetscBool  sbaij, done;
3307     PetscInt  *d_nnz, *o_nnz;
3308 
3309     PetscCall(PetscBTCreate(nbs, &bt));
3310     PetscCall(PetscHSetICreate(&ht));
3311     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3312     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3313     for (i = 0; i < mbs; i++) {
3314       PetscCall(PetscBTMemzero(nbs, bt));
3315       PetscCall(PetscHSetIClear(ht));
3316       for (k = 0; k < bs; k++) {
3317         PetscInt row = bs * i + k;
3318         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3319           PetscInt col = colidxs[j];
3320           if (!sbaij || col >= row) {
3321             if (col >= cs && col < ce) {
3322               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3323             } else {
3324               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3325               if (done) o_nnz[i]++;
3326             }
3327           }
3328         }
3329       }
3330     }
3331     PetscCall(PetscBTDestroy(&bt));
3332     PetscCall(PetscHSetIDestroy(&ht));
3333     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3334     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3335     PetscCall(PetscFree2(d_nnz, o_nnz));
3336   }
3337 
3338   /* store matrix values */
3339   for (i = 0; i < m; i++) {
3340     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3341     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3342   }
3343 
3344   PetscCall(PetscFree(rowidxs));
3345   PetscCall(PetscFree2(colidxs, matvals));
3346   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3347   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3348   PetscFunctionReturn(PETSC_SUCCESS);
3349 }
3350 
3351 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3352 {
3353   PetscBool isbinary;
3354 
3355   PetscFunctionBegin;
3356   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3357   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3358   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3359   PetscFunctionReturn(PETSC_SUCCESS);
3360 }
3361 
3362 /*@
3363   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3364 
3365   Input Parameters:
3366 + mat  - the matrix
3367 - fact - factor
3368 
3369   Options Database Key:
3370 . -mat_use_hash_table <fact> - provide the factor
3371 
3372   Level: advanced
3373 
3374 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3375 @*/
3376 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3377 {
3378   PetscFunctionBegin;
3379   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3380   PetscFunctionReturn(PETSC_SUCCESS);
3381 }
3382 
3383 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3384 {
3385   Mat_MPIBAIJ *baij;
3386 
3387   PetscFunctionBegin;
3388   baij          = (Mat_MPIBAIJ *)mat->data;
3389   baij->ht_fact = fact;
3390   PetscFunctionReturn(PETSC_SUCCESS);
3391 }
3392 
3393 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3394 {
3395   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3396   PetscBool    flg;
3397 
3398   PetscFunctionBegin;
3399   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3400   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3401   if (Ad) *Ad = a->A;
3402   if (Ao) *Ao = a->B;
3403   if (colmap) *colmap = a->garray;
3404   PetscFunctionReturn(PETSC_SUCCESS);
3405 }
3406 
3407 /*
3408     Special version for direct calls from Fortran (to eliminate two function call overheads
3409 */
3410 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3411   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3412 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3413   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3414 #endif
3415 
3416 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3417 /*@C
3418   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3419 
3420   Collective
3421 
3422   Input Parameters:
3423 + matin  - the matrix
3424 . min    - number of input rows
3425 . im     - input rows
3426 . nin    - number of input columns
3427 . in     - input columns
3428 . v      - numerical values input
3429 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3430 
3431   Level: advanced
3432 
3433   Developer Notes:
3434   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3435 
3436 .seealso: `Mat`, `MatSetValuesBlocked()`
3437 @*/
3438 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3439 {
3440   /* convert input arguments to C version */
3441   Mat        mat = *matin;
3442   PetscInt   m = *min, n = *nin;
3443   InsertMode addv = *addvin;
3444 
3445   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3446   const MatScalar *value;
3447   MatScalar       *barray      = baij->barray;
3448   PetscBool        roworiented = baij->roworiented;
3449   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3450   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3451   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3452 
3453   PetscFunctionBegin;
3454   /* tasks normally handled by MatSetValuesBlocked() */
3455   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3456   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3457   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3458   if (mat->assembled) {
3459     mat->was_assembled = PETSC_TRUE;
3460     mat->assembled     = PETSC_FALSE;
3461   }
3462   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3463 
3464   if (!barray) {
3465     PetscCall(PetscMalloc1(bs2, &barray));
3466     baij->barray = barray;
3467   }
3468 
3469   if (roworiented) stepval = (n - 1) * bs;
3470   else stepval = (m - 1) * bs;
3471 
3472   for (i = 0; i < m; i++) {
3473     if (im[i] < 0) continue;
3474     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3475     if (im[i] >= rstart && im[i] < rend) {
3476       row = im[i] - rstart;
3477       for (j = 0; j < n; j++) {
3478         /* If NumCol = 1 then a copy is not required */
3479         if ((roworiented) && (n == 1)) {
3480           barray = (MatScalar *)v + i * bs2;
3481         } else if ((!roworiented) && (m == 1)) {
3482           barray = (MatScalar *)v + j * bs2;
3483         } else { /* Here a copy is required */
3484           if (roworiented) {
3485             value = v + i * (stepval + bs) * bs + j * bs;
3486           } else {
3487             value = v + j * (stepval + bs) * bs + i * bs;
3488           }
3489           for (ii = 0; ii < bs; ii++, value += stepval) {
3490             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3491           }
3492           barray -= bs2;
3493         }
3494 
3495         if (in[j] >= cstart && in[j] < cend) {
3496           col = in[j] - cstart;
3497           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3498         } else if (in[j] < 0) {
3499           continue;
3500         } else {
3501           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3502           if (mat->was_assembled) {
3503             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3504 
3505 #if defined(PETSC_USE_DEBUG)
3506   #if defined(PETSC_USE_CTABLE)
3507             {
3508               PetscInt data;
3509               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3510               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3511             }
3512   #else
3513             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3514   #endif
3515 #endif
3516 #if defined(PETSC_USE_CTABLE)
3517             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3518             col = (col - 1) / bs;
3519 #else
3520             col = (baij->colmap[in[j]] - 1) / bs;
3521 #endif
3522             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3523               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3524               col = in[j];
3525             }
3526           } else col = in[j];
3527           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3528         }
3529       }
3530     } else {
3531       if (!baij->donotstash) {
3532         if (roworiented) {
3533           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3534         } else {
3535           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3536         }
3537       }
3538     }
3539   }
3540 
3541   /* task normally handled by MatSetValuesBlocked() */
3542   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3543   PetscFunctionReturn(PETSC_SUCCESS);
3544 }
3545 
3546 /*@
3547   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3548   CSR format the local rows.
3549 
3550   Collective
3551 
3552   Input Parameters:
3553 + comm - MPI communicator
3554 . bs   - the block size, only a block size of 1 is supported
3555 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3556 . n    - This value should be the same as the local size used in creating the
3557        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3558        calculated if N is given) For square matrices n is almost always m.
3559 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3560 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3561 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3562 . j    - column indices
3563 - a    - matrix values
3564 
3565   Output Parameter:
3566 . mat - the matrix
3567 
3568   Level: intermediate
3569 
3570   Notes:
3571   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3572   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3573   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3574 
3575   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3576   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3577   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3578   with column-major ordering within blocks.
3579 
3580   The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array.
3581 
3582 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3583           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3584 @*/
3585 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3586 {
3587   PetscFunctionBegin;
3588   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3589   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3590   PetscCall(MatCreate(comm, mat));
3591   PetscCall(MatSetSizes(*mat, m, n, M, N));
3592   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3593   PetscCall(MatSetBlockSize(*mat, bs));
3594   PetscCall(MatSetUp(*mat));
3595   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3596   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3597   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3598   PetscFunctionReturn(PETSC_SUCCESS);
3599 }
3600 
3601 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3602 {
3603   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3604   PetscInt    *indx;
3605   PetscScalar *values;
3606 
3607   PetscFunctionBegin;
3608   PetscCall(MatGetSize(inmat, &m, &N));
3609   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3610     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3611     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3612     PetscInt    *bindx, rmax = a->rmax, j;
3613     PetscMPIInt  rank, size;
3614 
3615     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3616     mbs = m / bs;
3617     Nbs = N / cbs;
3618     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3619     nbs = n / cbs;
3620 
3621     PetscCall(PetscMalloc1(rmax, &bindx));
3622     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3623 
3624     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3625     PetscCallMPI(MPI_Comm_rank(comm, &size));
3626     if (rank == size - 1) {
3627       /* Check sum(nbs) = Nbs */
3628       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3629     }
3630 
3631     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3632     for (i = 0; i < mbs; i++) {
3633       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3634       nnz = nnz / bs;
3635       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3636       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3637       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3638     }
3639     PetscCall(PetscFree(bindx));
3640 
3641     PetscCall(MatCreate(comm, outmat));
3642     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3643     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3644     PetscCall(MatSetType(*outmat, MATBAIJ));
3645     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3646     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3647     MatPreallocateEnd(dnz, onz);
3648     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3649   }
3650 
3651   /* numeric phase */
3652   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3653   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3654 
3655   for (i = 0; i < m; i++) {
3656     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3657     Ii = i + rstart;
3658     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3659     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3660   }
3661   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3662   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3663   PetscFunctionReturn(PETSC_SUCCESS);
3664 }
3665