xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 6a210b70a61472cf8733db59e8a3fa68f6578c01)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *va, *vv;
62   Vec                vB, vA;
63   const PetscScalar *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayWrite(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(vA, &vv));
92   PetscCall(VecRestoreArrayWrite(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)(B)->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_CTABLE)
461             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462             col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467               PetscCall(MatDisAssemble_MPIBAIJ(mat));
468               col = in[j];
469             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470           } else col = in[j];
471           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472         }
473       }
474     } else {
475       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476       if (!baij->donotstash) {
477         if (roworiented) {
478           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479         } else {
480           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #define HASH_KEY             0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
495   PetscBool    roworiented = baij->roworiented;
496   PetscInt     i, j, row, col;
497   PetscInt     rstart_orig = mat->rmap->rstart;
498   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500   PetscReal    tmp;
501   MatScalar  **HD       = baij->hd, value;
502   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503 
504   PetscFunctionBegin;
505   for (i = 0; i < m; i++) {
506     if (PetscDefined(USE_DEBUG)) {
507       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509     }
510     row = im[i];
511     if (row >= rstart_orig && row < rend_orig) {
512       for (j = 0; j < n; j++) {
513         col = in[j];
514         if (roworiented) value = v[i * n + j];
515         else value = v[i + j * m];
516         /* Look up PetscInto the Hash Table */
517         key = (row / bs) * Nbs + (col / bs) + 1;
518         h1  = HASH(size, key, tmp);
519 
520         idx = h1;
521         if (PetscDefined(USE_DEBUG)) {
522           insert_ct++;
523           total_ct++;
524           if (HT[idx] != key) {
525             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526             if (idx == size) {
527               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529             }
530           }
531         } else if (HT[idx] != key) {
532           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533           if (idx == size) {
534             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595             if (idx == size) {
596               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602           if (idx == size) {
603             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605           }
606         }
607         baij_a = HD[idx];
608         if (roworiented) {
609           /*value = v + i*(stepval+bs)*bs + j*bs;*/
610           /* value = v + (i*(stepval+bs)+j)*bs; */
611           value = v_t;
612           v_t += bs;
613           if (addv == ADD_VALUES) {
614             for (ii = 0; ii < bs; ii++, value += stepval) {
615               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616             }
617           } else {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620             }
621           }
622         } else {
623           value = v + j * (stepval + bs) * bs + i * bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631             }
632           }
633         }
634       }
635     } else {
636       if (!baij->donotstash) {
637         if (roworiented) {
638           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639         } else {
640           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         }
642       }
643     }
644   }
645   if (PetscDefined(USE_DEBUG)) {
646     baij->ht_total_ct += total_ct;
647     baij->ht_insert_ct += insert_ct;
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657 
658   PetscFunctionBegin;
659   for (i = 0; i < m; i++) {
660     if (idxm[i] < 0) continue; /* negative row */
661     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
663       row = idxm[i] - bsrstart;
664       for (j = 0; j < n; j++) {
665         if (idxn[j] < 0) continue; /* negative column */
666         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667         if (idxn[j] >= bscstart && idxn[j] < bscend) {
668           col = idxn[j] - bscstart;
669           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670         } else {
671           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674           data--;
675 #else
676           data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
679           else {
680             col = data + idxn[j] % bs;
681             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682           }
683         }
684       }
685     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
686   }
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
691 {
692   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
693   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
694   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
695   PetscReal    sum = 0.0;
696   MatScalar   *v;
697 
698   PetscFunctionBegin;
699   if (baij->size == 1) {
700     PetscCall(MatNorm(baij->A, type, nrm));
701   } else {
702     if (type == NORM_FROBENIUS) {
703       v  = amat->a;
704       nz = amat->nz * bs2;
705       for (i = 0; i < nz; i++) {
706         sum += PetscRealPart(PetscConj(*v) * (*v));
707         v++;
708       }
709       v  = bmat->a;
710       nz = bmat->nz * bs2;
711       for (i = 0; i < nz; i++) {
712         sum += PetscRealPart(PetscConj(*v) * (*v));
713         v++;
714       }
715       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
716       *nrm = PetscSqrtReal(*nrm);
717     } else if (type == NORM_1) { /* max column sum */
718       PetscReal  *tmp, *tmp2;
719       PetscInt   *jj, *garray = baij->garray, cstart = baij->rstartbs;
720       PetscMPIInt iN;
721 
722       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
723       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           col = bs * (cstart + *jj) + j; /* column index */
729           for (row = 0; row < bs; row++) {
730             tmp[col] += PetscAbsScalar(*v);
731             v++;
732           }
733         }
734         jj++;
735       }
736       v  = bmat->a;
737       jj = bmat->j;
738       for (i = 0; i < bmat->nz; i++) {
739         for (j = 0; j < bs; j++) {
740           col = bs * garray[*jj] + j;
741           for (row = 0; row < bs; row++) {
742             tmp[col] += PetscAbsScalar(*v);
743             v++;
744           }
745         }
746         jj++;
747       }
748       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
749       PetscCall(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
750       *nrm = 0.0;
751       for (j = 0; j < mat->cmap->N; j++) {
752         if (tmp2[j] > *nrm) *nrm = tmp2[j];
753       }
754       PetscCall(PetscFree(tmp));
755       PetscCall(PetscFree(tmp2));
756     } else if (type == NORM_INFINITY) { /* max row sum */
757       PetscReal *sums;
758       PetscCall(PetscMalloc1(bs, &sums));
759       sum = 0.0;
760       for (j = 0; j < amat->mbs; j++) {
761         for (row = 0; row < bs; row++) sums[row] = 0.0;
762         v  = amat->a + bs2 * amat->i[j];
763         nz = amat->i[j + 1] - amat->i[j];
764         for (i = 0; i < nz; i++) {
765           for (col = 0; col < bs; col++) {
766             for (row = 0; row < bs; row++) {
767               sums[row] += PetscAbsScalar(*v);
768               v++;
769             }
770           }
771         }
772         v  = bmat->a + bs2 * bmat->i[j];
773         nz = bmat->i[j + 1] - bmat->i[j];
774         for (i = 0; i < nz; i++) {
775           for (col = 0; col < bs; col++) {
776             for (row = 0; row < bs; row++) {
777               sums[row] += PetscAbsScalar(*v);
778               v++;
779             }
780           }
781         }
782         for (row = 0; row < bs; row++) {
783           if (sums[row] > sum) sum = sums[row];
784         }
785       }
786       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
787       PetscCall(PetscFree(sums));
788     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
789   }
790   PetscFunctionReturn(PETSC_SUCCESS);
791 }
792 
793 /*
794   Creates the hash table, and sets the table
795   This table is created only once.
796   If new entried need to be added to the matrix
797   then the hash table has to be destroyed and
798   recreated.
799 */
800 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
801 {
802   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
803   Mat          A = baij->A, B = baij->B;
804   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
805   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
806   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
807   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
808   PetscInt    *HT, key;
809   MatScalar  **HD;
810   PetscReal    tmp;
811 #if defined(PETSC_USE_INFO)
812   PetscInt ct = 0, max = 0;
813 #endif
814 
815   PetscFunctionBegin;
816   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
817 
818   baij->ht_size = (PetscInt)(factor * nz);
819   ht_size       = baij->ht_size;
820 
821   /* Allocate Memory for Hash Table */
822   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
823   HD = baij->hd;
824   HT = baij->ht;
825 
826   /* Loop Over A */
827   for (i = 0; i < a->mbs; i++) {
828     for (j = ai[i]; j < ai[i + 1]; j++) {
829       row = i + rstart;
830       col = aj[j] + cstart;
831 
832       key = row * Nbs + col + 1;
833       h1  = HASH(ht_size, key, tmp);
834       for (k = 0; k < ht_size; k++) {
835         if (!HT[(h1 + k) % ht_size]) {
836           HT[(h1 + k) % ht_size] = key;
837           HD[(h1 + k) % ht_size] = a->a + j * bs2;
838           break;
839 #if defined(PETSC_USE_INFO)
840         } else {
841           ct++;
842 #endif
843         }
844       }
845 #if defined(PETSC_USE_INFO)
846       if (k > max) max = k;
847 #endif
848     }
849   }
850   /* Loop Over B */
851   for (i = 0; i < b->mbs; i++) {
852     for (j = bi[i]; j < bi[i + 1]; j++) {
853       row = i + rstart;
854       col = garray[bj[j]];
855       key = row * Nbs + col + 1;
856       h1  = HASH(ht_size, key, tmp);
857       for (k = 0; k < ht_size; k++) {
858         if (!HT[(h1 + k) % ht_size]) {
859           HT[(h1 + k) % ht_size] = key;
860           HD[(h1 + k) % ht_size] = b->a + j * bs2;
861           break;
862 #if defined(PETSC_USE_INFO)
863         } else {
864           ct++;
865 #endif
866         }
867       }
868 #if defined(PETSC_USE_INFO)
869       if (k > max) max = k;
870 #endif
871     }
872   }
873 
874   /* Print Summary */
875 #if defined(PETSC_USE_INFO)
876   for (i = 0, j = 0; i < ht_size; i++) {
877     if (HT[i]) j++;
878   }
879   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
880 #endif
881   PetscFunctionReturn(PETSC_SUCCESS);
882 }
883 
884 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
885 {
886   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
887   PetscInt     nstash, reallocs;
888 
889   PetscFunctionBegin;
890   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
891 
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
893   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
894   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
895   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
896   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
897   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
898   PetscFunctionReturn(PETSC_SUCCESS);
899 }
900 
901 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
902 {
903   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
904   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
905   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
906   PetscInt    *row, *col;
907   PetscBool    r1, r2, r3, other_disassembled;
908   MatScalar   *val;
909   PetscMPIInt  n;
910 
911   PetscFunctionBegin;
912   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
913   if (!baij->donotstash && !mat->nooffprocentries) {
914     while (1) {
915       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
916       if (!flg) break;
917 
918       for (i = 0; i < n;) {
919         /* Now identify the consecutive vals belonging to the same row */
920         for (j = i, rstart = row[j]; j < n; j++) {
921           if (row[j] != rstart) break;
922         }
923         if (j < n) ncols = j - i;
924         else ncols = n - i;
925         /* Now assemble all these values with a single function call */
926         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
927         i = j;
928       }
929     }
930     PetscCall(MatStashScatterEnd_Private(&mat->stash));
931     /* Now process the block-stash. Since the values are stashed column-oriented,
932        set the row-oriented flag to column-oriented, and after MatSetValues()
933        restore the original flags */
934     r1 = baij->roworiented;
935     r2 = a->roworiented;
936     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
937 
938     baij->roworiented                           = PETSC_FALSE;
939     a->roworiented                              = PETSC_FALSE;
940     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
941     while (1) {
942       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
943       if (!flg) break;
944 
945       for (i = 0; i < n;) {
946         /* Now identify the consecutive vals belonging to the same row */
947         for (j = i, rstart = row[j]; j < n; j++) {
948           if (row[j] != rstart) break;
949         }
950         if (j < n) ncols = j - i;
951         else ncols = n - i;
952         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
953         i = j;
954       }
955     }
956     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
957 
958     baij->roworiented                           = r1;
959     a->roworiented                              = r2;
960     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
961   }
962 
963   PetscCall(MatAssemblyBegin(baij->A, mode));
964   PetscCall(MatAssemblyEnd(baij->A, mode));
965 
966   /* determine if any processor has disassembled, if so we must
967      also disassemble ourselves, in order that we may reassemble. */
968   /*
969      if nonzero structure of submatrix B cannot change then we know that
970      no processor disassembled thus we can skip this stuff
971   */
972   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
973     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
974     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
975   }
976 
977   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
978   PetscCall(MatAssemblyBegin(baij->B, mode));
979   PetscCall(MatAssemblyEnd(baij->B, mode));
980 
981 #if defined(PETSC_USE_INFO)
982   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
983     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
984 
985     baij->ht_total_ct  = 0;
986     baij->ht_insert_ct = 0;
987   }
988 #endif
989   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
990     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
991 
992     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
993     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
994   }
995 
996   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
997 
998   baij->rowvalues = NULL;
999 
1000   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1001   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
1002     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1003     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1004   }
1005   PetscFunctionReturn(PETSC_SUCCESS);
1006 }
1007 
1008 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1009 #include <petscdraw.h>
1010 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1011 {
1012   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1013   PetscMPIInt       rank = baij->rank;
1014   PetscInt          bs   = mat->rmap->bs;
1015   PetscBool         iascii, isdraw;
1016   PetscViewer       sviewer;
1017   PetscViewerFormat format;
1018 
1019   PetscFunctionBegin;
1020   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1021   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1022   if (iascii) {
1023     PetscCall(PetscViewerGetFormat(viewer, &format));
1024     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1025       MatInfo info;
1026       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1027       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1028       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1029       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1030                                                    mat->rmap->bs, (double)info.memory));
1031       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1032       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1033       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1034       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1035       PetscCall(PetscViewerFlush(viewer));
1036       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1037       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1038       PetscCall(VecScatterView(baij->Mvctx, viewer));
1039       PetscFunctionReturn(PETSC_SUCCESS);
1040     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1041       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1042       PetscFunctionReturn(PETSC_SUCCESS);
1043     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1044       PetscFunctionReturn(PETSC_SUCCESS);
1045     }
1046   }
1047 
1048   if (isdraw) {
1049     PetscDraw draw;
1050     PetscBool isnull;
1051     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1052     PetscCall(PetscDrawIsNull(draw, &isnull));
1053     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1054   }
1055 
1056   {
1057     /* assemble the entire matrix onto first processor. */
1058     Mat          A;
1059     Mat_SeqBAIJ *Aloc;
1060     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1061     MatScalar   *a;
1062     const char  *matname;
1063 
1064     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1065     /* Perhaps this should be the type of mat? */
1066     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1067     if (rank == 0) {
1068       PetscCall(MatSetSizes(A, M, N, M, N));
1069     } else {
1070       PetscCall(MatSetSizes(A, 0, 0, M, N));
1071     }
1072     PetscCall(MatSetType(A, MATMPIBAIJ));
1073     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1074     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1075 
1076     /* copy over the A part */
1077     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1078     ai   = Aloc->i;
1079     aj   = Aloc->j;
1080     a    = Aloc->a;
1081     PetscCall(PetscMalloc1(bs, &rvals));
1082 
1083     for (i = 0; i < mbs; i++) {
1084       rvals[0] = bs * (baij->rstartbs + i);
1085       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1086       for (j = ai[i]; j < ai[i + 1]; j++) {
1087         col = (baij->cstartbs + aj[j]) * bs;
1088         for (k = 0; k < bs; k++) {
1089           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1090           col++;
1091           a += bs;
1092         }
1093       }
1094     }
1095     /* copy over the B part */
1096     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1097     ai   = Aloc->i;
1098     aj   = Aloc->j;
1099     a    = Aloc->a;
1100     for (i = 0; i < mbs; i++) {
1101       rvals[0] = bs * (baij->rstartbs + i);
1102       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1103       for (j = ai[i]; j < ai[i + 1]; j++) {
1104         col = baij->garray[aj[j]] * bs;
1105         for (k = 0; k < bs; k++) {
1106           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1107           col++;
1108           a += bs;
1109         }
1110       }
1111     }
1112     PetscCall(PetscFree(rvals));
1113     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1114     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1115     /*
1116        Everyone has to call to draw the matrix since the graphics waits are
1117        synchronized across all processors that share the PetscDraw object
1118     */
1119     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1120     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1121     if (rank == 0) {
1122       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1123       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1124     }
1125     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1126     PetscCall(MatDestroy(&A));
1127   }
1128   PetscFunctionReturn(PETSC_SUCCESS);
1129 }
1130 
1131 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1132 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1133 {
1134   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1135   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1136   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1137   const PetscInt *garray = aij->garray;
1138   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1139   PetscCount      nz, hnz;
1140   PetscInt       *rowlens, *colidxs;
1141   PetscScalar    *matvals;
1142   PetscMPIInt     rank;
1143 
1144   PetscFunctionBegin;
1145   PetscCall(PetscViewerSetUp(viewer));
1146 
1147   M  = mat->rmap->N;
1148   N  = mat->cmap->N;
1149   m  = mat->rmap->n;
1150   rs = mat->rmap->rstart;
1151   cs = mat->cmap->rstart;
1152   bs = mat->rmap->bs;
1153   nz = bs * bs * (A->nz + B->nz);
1154 
1155   /* write matrix header */
1156   header[0] = MAT_FILE_CLASSID;
1157   header[1] = M;
1158   header[2] = N;
1159   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1160   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1161   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1162   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1163 
1164   /* fill in and store row lengths */
1165   PetscCall(PetscMalloc1(m, &rowlens));
1166   for (cnt = 0, i = 0; i < A->mbs; i++)
1167     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1168   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1169   PetscCall(PetscFree(rowlens));
1170 
1171   /* fill in and store column indices */
1172   PetscCall(PetscMalloc1(nz, &colidxs));
1173   for (cnt = 0, i = 0; i < A->mbs; i++) {
1174     for (k = 0; k < bs; k++) {
1175       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1176         if (garray[B->j[jb]] > cs / bs) break;
1177         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1178       }
1179       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1180         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1181       for (; jb < B->i[i + 1]; jb++)
1182         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1183     }
1184   }
1185   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz);
1186   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1187   PetscCall(PetscFree(colidxs));
1188 
1189   /* fill in and store nonzero values */
1190   PetscCall(PetscMalloc1(nz, &matvals));
1191   for (cnt = 0, i = 0; i < A->mbs; i++) {
1192     for (k = 0; k < bs; k++) {
1193       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1194         if (garray[B->j[jb]] > cs / bs) break;
1195         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1196       }
1197       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1198         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1199       for (; jb < B->i[i + 1]; jb++)
1200         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1201     }
1202   }
1203   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1204   PetscCall(PetscFree(matvals));
1205 
1206   /* write block size option to the viewer's .info file */
1207   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1208   PetscFunctionReturn(PETSC_SUCCESS);
1209 }
1210 
1211 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1212 {
1213   PetscBool iascii, isdraw, issocket, isbinary;
1214 
1215   PetscFunctionBegin;
1216   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1217   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1218   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1219   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1220   if (iascii || isdraw || issocket) {
1221     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1222   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1223   PetscFunctionReturn(PETSC_SUCCESS);
1224 }
1225 
1226 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1227 {
1228   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1229   PetscInt     nt;
1230 
1231   PetscFunctionBegin;
1232   PetscCall(VecGetLocalSize(xx, &nt));
1233   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1234   PetscCall(VecGetLocalSize(yy, &nt));
1235   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1236   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1237   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1238   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1239   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1240   PetscFunctionReturn(PETSC_SUCCESS);
1241 }
1242 
1243 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1244 {
1245   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1246 
1247   PetscFunctionBegin;
1248   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1249   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1250   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1251   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1252   PetscFunctionReturn(PETSC_SUCCESS);
1253 }
1254 
1255 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1256 {
1257   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1258 
1259   PetscFunctionBegin;
1260   /* do nondiagonal part */
1261   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1262   /* do local part */
1263   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1264   /* add partial results together */
1265   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1266   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1267   PetscFunctionReturn(PETSC_SUCCESS);
1268 }
1269 
1270 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1271 {
1272   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1273 
1274   PetscFunctionBegin;
1275   /* do nondiagonal part */
1276   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1277   /* do local part */
1278   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1279   /* add partial results together */
1280   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1281   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1282   PetscFunctionReturn(PETSC_SUCCESS);
1283 }
1284 
1285 /*
1286   This only works correctly for square matrices where the subblock A->A is the
1287    diagonal block
1288 */
1289 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1290 {
1291   PetscFunctionBegin;
1292   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1293   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1294   PetscFunctionReturn(PETSC_SUCCESS);
1295 }
1296 
1297 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1298 {
1299   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1300 
1301   PetscFunctionBegin;
1302   PetscCall(MatScale(a->A, aa));
1303   PetscCall(MatScale(a->B, aa));
1304   PetscFunctionReturn(PETSC_SUCCESS);
1305 }
1306 
1307 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1308 {
1309   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1310   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1311   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1312   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1313   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1314 
1315   PetscFunctionBegin;
1316   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1317   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1318   mat->getrowactive = PETSC_TRUE;
1319 
1320   if (!mat->rowvalues && (idx || v)) {
1321     /*
1322         allocate enough space to hold information from the longest row.
1323     */
1324     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1325     PetscInt     max = 1, mbs = mat->mbs, tmp;
1326     for (i = 0; i < mbs; i++) {
1327       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1328       if (max < tmp) max = tmp;
1329     }
1330     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1331   }
1332   lrow = row - brstart;
1333 
1334   pvA = &vworkA;
1335   pcA = &cworkA;
1336   pvB = &vworkB;
1337   pcB = &cworkB;
1338   if (!v) {
1339     pvA = NULL;
1340     pvB = NULL;
1341   }
1342   if (!idx) {
1343     pcA = NULL;
1344     if (!v) pcB = NULL;
1345   }
1346   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1347   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1348   nztot = nzA + nzB;
1349 
1350   cmap = mat->garray;
1351   if (v || idx) {
1352     if (nztot) {
1353       /* Sort by increasing column numbers, assuming A and B already sorted */
1354       PetscInt imark = -1;
1355       if (v) {
1356         *v = v_p = mat->rowvalues;
1357         for (i = 0; i < nzB; i++) {
1358           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1359           else break;
1360         }
1361         imark = i;
1362         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1363         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1364       }
1365       if (idx) {
1366         *idx = idx_p = mat->rowindices;
1367         if (imark > -1) {
1368           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1369         } else {
1370           for (i = 0; i < nzB; i++) {
1371             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1372             else break;
1373           }
1374           imark = i;
1375         }
1376         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1377         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1378       }
1379     } else {
1380       if (idx) *idx = NULL;
1381       if (v) *v = NULL;
1382     }
1383   }
1384   *nz = nztot;
1385   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1386   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1387   PetscFunctionReturn(PETSC_SUCCESS);
1388 }
1389 
1390 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1391 {
1392   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1393 
1394   PetscFunctionBegin;
1395   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1396   baij->getrowactive = PETSC_FALSE;
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
1400 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1401 {
1402   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1403 
1404   PetscFunctionBegin;
1405   PetscCall(MatZeroEntries(l->A));
1406   PetscCall(MatZeroEntries(l->B));
1407   PetscFunctionReturn(PETSC_SUCCESS);
1408 }
1409 
1410 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1411 {
1412   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1413   Mat            A = a->A, B = a->B;
1414   PetscLogDouble isend[5], irecv[5];
1415 
1416   PetscFunctionBegin;
1417   info->block_size = (PetscReal)matin->rmap->bs;
1418 
1419   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1420 
1421   isend[0] = info->nz_used;
1422   isend[1] = info->nz_allocated;
1423   isend[2] = info->nz_unneeded;
1424   isend[3] = info->memory;
1425   isend[4] = info->mallocs;
1426 
1427   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1428 
1429   isend[0] += info->nz_used;
1430   isend[1] += info->nz_allocated;
1431   isend[2] += info->nz_unneeded;
1432   isend[3] += info->memory;
1433   isend[4] += info->mallocs;
1434 
1435   if (flag == MAT_LOCAL) {
1436     info->nz_used      = isend[0];
1437     info->nz_allocated = isend[1];
1438     info->nz_unneeded  = isend[2];
1439     info->memory       = isend[3];
1440     info->mallocs      = isend[4];
1441   } else if (flag == MAT_GLOBAL_MAX) {
1442     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1443 
1444     info->nz_used      = irecv[0];
1445     info->nz_allocated = irecv[1];
1446     info->nz_unneeded  = irecv[2];
1447     info->memory       = irecv[3];
1448     info->mallocs      = irecv[4];
1449   } else if (flag == MAT_GLOBAL_SUM) {
1450     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1451 
1452     info->nz_used      = irecv[0];
1453     info->nz_allocated = irecv[1];
1454     info->nz_unneeded  = irecv[2];
1455     info->memory       = irecv[3];
1456     info->mallocs      = irecv[4];
1457   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1458   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1459   info->fill_ratio_needed = 0;
1460   info->factor_mallocs    = 0;
1461   PetscFunctionReturn(PETSC_SUCCESS);
1462 }
1463 
1464 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1465 {
1466   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1467 
1468   PetscFunctionBegin;
1469   switch (op) {
1470   case MAT_NEW_NONZERO_LOCATIONS:
1471   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1472   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1473   case MAT_KEEP_NONZERO_PATTERN:
1474   case MAT_NEW_NONZERO_LOCATION_ERR:
1475     MatCheckPreallocated(A, 1);
1476     PetscCall(MatSetOption(a->A, op, flg));
1477     PetscCall(MatSetOption(a->B, op, flg));
1478     break;
1479   case MAT_ROW_ORIENTED:
1480     MatCheckPreallocated(A, 1);
1481     a->roworiented = flg;
1482 
1483     PetscCall(MatSetOption(a->A, op, flg));
1484     PetscCall(MatSetOption(a->B, op, flg));
1485     break;
1486   case MAT_FORCE_DIAGONAL_ENTRIES:
1487   case MAT_SORTED_FULL:
1488     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1489     break;
1490   case MAT_IGNORE_OFF_PROC_ENTRIES:
1491     a->donotstash = flg;
1492     break;
1493   case MAT_USE_HASH_TABLE:
1494     a->ht_flag = flg;
1495     a->ht_fact = 1.39;
1496     break;
1497   case MAT_SYMMETRIC:
1498   case MAT_STRUCTURALLY_SYMMETRIC:
1499   case MAT_HERMITIAN:
1500   case MAT_SUBMAT_SINGLEIS:
1501   case MAT_SYMMETRY_ETERNAL:
1502   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1503   case MAT_SPD_ETERNAL:
1504     /* if the diagonal matrix is square it inherits some of the properties above */
1505     break;
1506   default:
1507     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1508   }
1509   PetscFunctionReturn(PETSC_SUCCESS);
1510 }
1511 
1512 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1513 {
1514   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1515   Mat_SeqBAIJ *Aloc;
1516   Mat          B;
1517   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1518   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1519   MatScalar   *a;
1520 
1521   PetscFunctionBegin;
1522   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1523   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1524     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1525     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1526     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1527     /* Do not know preallocation information, but must set block size */
1528     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1529   } else {
1530     B = *matout;
1531   }
1532 
1533   /* copy over the A part */
1534   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1535   ai   = Aloc->i;
1536   aj   = Aloc->j;
1537   a    = Aloc->a;
1538   PetscCall(PetscMalloc1(bs, &rvals));
1539 
1540   for (i = 0; i < mbs; i++) {
1541     rvals[0] = bs * (baij->rstartbs + i);
1542     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1543     for (j = ai[i]; j < ai[i + 1]; j++) {
1544       col = (baij->cstartbs + aj[j]) * bs;
1545       for (k = 0; k < bs; k++) {
1546         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1547 
1548         col++;
1549         a += bs;
1550       }
1551     }
1552   }
1553   /* copy over the B part */
1554   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1555   ai   = Aloc->i;
1556   aj   = Aloc->j;
1557   a    = Aloc->a;
1558   for (i = 0; i < mbs; i++) {
1559     rvals[0] = bs * (baij->rstartbs + i);
1560     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1561     for (j = ai[i]; j < ai[i + 1]; j++) {
1562       col = baij->garray[aj[j]] * bs;
1563       for (k = 0; k < bs; k++) {
1564         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1565         col++;
1566         a += bs;
1567       }
1568     }
1569   }
1570   PetscCall(PetscFree(rvals));
1571   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1572   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1573 
1574   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1575   else PetscCall(MatHeaderMerge(A, &B));
1576   PetscFunctionReturn(PETSC_SUCCESS);
1577 }
1578 
1579 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1580 {
1581   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1582   Mat          a = baij->A, b = baij->B;
1583   PetscInt     s1, s2, s3;
1584 
1585   PetscFunctionBegin;
1586   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1587   if (rr) {
1588     PetscCall(VecGetLocalSize(rr, &s1));
1589     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1590     /* Overlap communication with computation. */
1591     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1592   }
1593   if (ll) {
1594     PetscCall(VecGetLocalSize(ll, &s1));
1595     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1596     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1597   }
1598   /* scale  the diagonal block */
1599   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1600 
1601   if (rr) {
1602     /* Do a scatter end and then right scale the off-diagonal block */
1603     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1604     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1605   }
1606   PetscFunctionReturn(PETSC_SUCCESS);
1607 }
1608 
1609 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1610 {
1611   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1612   PetscInt    *lrows;
1613   PetscInt     r, len;
1614   PetscBool    cong;
1615 
1616   PetscFunctionBegin;
1617   /* get locally owned rows */
1618   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1619   /* fix right-hand side if needed */
1620   if (x && b) {
1621     const PetscScalar *xx;
1622     PetscScalar       *bb;
1623 
1624     PetscCall(VecGetArrayRead(x, &xx));
1625     PetscCall(VecGetArray(b, &bb));
1626     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1627     PetscCall(VecRestoreArrayRead(x, &xx));
1628     PetscCall(VecRestoreArray(b, &bb));
1629   }
1630 
1631   /* actually zap the local rows */
1632   /*
1633         Zero the required rows. If the "diagonal block" of the matrix
1634      is square and the user wishes to set the diagonal we use separate
1635      code so that MatSetValues() is not called for each diagonal allocating
1636      new memory, thus calling lots of mallocs and slowing things down.
1637 
1638   */
1639   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1640   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1641   PetscCall(MatHasCongruentLayouts(A, &cong));
1642   if ((diag != 0.0) && cong) {
1643     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1644   } else if (diag != 0.0) {
1645     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1646     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1647     for (r = 0; r < len; ++r) {
1648       const PetscInt row = lrows[r] + A->rmap->rstart;
1649       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1650     }
1651     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1652     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1653   } else {
1654     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1655   }
1656   PetscCall(PetscFree(lrows));
1657 
1658   /* only change matrix nonzero state if pattern was allowed to be changed */
1659   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1660     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1661     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1662   }
1663   PetscFunctionReturn(PETSC_SUCCESS);
1664 }
1665 
1666 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1667 {
1668   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1669   PetscMPIInt        n, p = 0;
1670   PetscInt           i, j, k, r, len = 0, row, col, count;
1671   PetscInt          *lrows, *owners = A->rmap->range;
1672   PetscSFNode       *rrows;
1673   PetscSF            sf;
1674   const PetscScalar *xx;
1675   PetscScalar       *bb, *mask;
1676   Vec                xmask, lmask;
1677   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1678   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1679   PetscScalar       *aa;
1680 
1681   PetscFunctionBegin;
1682   PetscCall(PetscMPIIntCast(A->rmap->n, &n));
1683   /* Create SF where leaves are input rows and roots are owned rows */
1684   PetscCall(PetscMalloc1(n, &lrows));
1685   for (r = 0; r < n; ++r) lrows[r] = -1;
1686   PetscCall(PetscMalloc1(N, &rrows));
1687   for (r = 0; r < N; ++r) {
1688     const PetscInt idx = rows[r];
1689     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1690     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1691       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1692     }
1693     rrows[r].rank  = p;
1694     rrows[r].index = rows[r] - owners[p];
1695   }
1696   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1697   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1698   /* Collect flags for rows to be zeroed */
1699   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1700   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1701   PetscCall(PetscSFDestroy(&sf));
1702   /* Compress and put in row numbers */
1703   for (r = 0; r < n; ++r)
1704     if (lrows[r] >= 0) lrows[len++] = r;
1705   /* zero diagonal part of matrix */
1706   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1707   /* handle off-diagonal part of matrix */
1708   PetscCall(MatCreateVecs(A, &xmask, NULL));
1709   PetscCall(VecDuplicate(l->lvec, &lmask));
1710   PetscCall(VecGetArray(xmask, &bb));
1711   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1712   PetscCall(VecRestoreArray(xmask, &bb));
1713   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1714   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1715   PetscCall(VecDestroy(&xmask));
1716   if (x) {
1717     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1718     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1719     PetscCall(VecGetArrayRead(l->lvec, &xx));
1720     PetscCall(VecGetArray(b, &bb));
1721   }
1722   PetscCall(VecGetArray(lmask, &mask));
1723   /* remove zeroed rows of off-diagonal matrix */
1724   for (i = 0; i < len; ++i) {
1725     row   = lrows[i];
1726     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1727     aa    = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs);
1728     for (k = 0; k < count; ++k) {
1729       aa[0] = 0.0;
1730       aa += bs;
1731     }
1732   }
1733   /* loop over all elements of off process part of matrix zeroing removed columns*/
1734   for (i = 0; i < l->B->rmap->N; ++i) {
1735     row = i / bs;
1736     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1737       for (k = 0; k < bs; ++k) {
1738         col = bs * baij->j[j] + k;
1739         if (PetscAbsScalar(mask[col])) {
1740           aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k;
1741           if (x) bb[i] -= aa[0] * xx[col];
1742           aa[0] = 0.0;
1743         }
1744       }
1745     }
1746   }
1747   if (x) {
1748     PetscCall(VecRestoreArray(b, &bb));
1749     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1750   }
1751   PetscCall(VecRestoreArray(lmask, &mask));
1752   PetscCall(VecDestroy(&lmask));
1753   PetscCall(PetscFree(lrows));
1754 
1755   /* only change matrix nonzero state if pattern was allowed to be changed */
1756   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1757     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1758     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1759   }
1760   PetscFunctionReturn(PETSC_SUCCESS);
1761 }
1762 
1763 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1764 {
1765   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1766 
1767   PetscFunctionBegin;
1768   PetscCall(MatSetUnfactored(a->A));
1769   PetscFunctionReturn(PETSC_SUCCESS);
1770 }
1771 
1772 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1773 
1774 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1775 {
1776   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1777   Mat          a, b, c, d;
1778   PetscBool    flg;
1779 
1780   PetscFunctionBegin;
1781   a = matA->A;
1782   b = matA->B;
1783   c = matB->A;
1784   d = matB->B;
1785 
1786   PetscCall(MatEqual(a, c, &flg));
1787   if (flg) PetscCall(MatEqual(b, d, &flg));
1788   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1789   PetscFunctionReturn(PETSC_SUCCESS);
1790 }
1791 
1792 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1793 {
1794   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1795   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1796 
1797   PetscFunctionBegin;
1798   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1799   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1800     PetscCall(MatCopy_Basic(A, B, str));
1801   } else {
1802     PetscCall(MatCopy(a->A, b->A, str));
1803     PetscCall(MatCopy(a->B, b->B, str));
1804   }
1805   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1806   PetscFunctionReturn(PETSC_SUCCESS);
1807 }
1808 
1809 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1810 {
1811   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1812   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1813   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1814 
1815   PetscFunctionBegin;
1816   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1817   PetscFunctionReturn(PETSC_SUCCESS);
1818 }
1819 
1820 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1821 {
1822   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1823   PetscBLASInt bnz, one                         = 1;
1824   Mat_SeqBAIJ *x, *y;
1825   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1826 
1827   PetscFunctionBegin;
1828   if (str == SAME_NONZERO_PATTERN) {
1829     PetscScalar alpha = a;
1830     x                 = (Mat_SeqBAIJ *)xx->A->data;
1831     y                 = (Mat_SeqBAIJ *)yy->A->data;
1832     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1833     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1834     x = (Mat_SeqBAIJ *)xx->B->data;
1835     y = (Mat_SeqBAIJ *)yy->B->data;
1836     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1837     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1838     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1839   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1840     PetscCall(MatAXPY_Basic(Y, a, X, str));
1841   } else {
1842     Mat       B;
1843     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1844     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1845     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1846     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1847     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1848     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1849     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1850     PetscCall(MatSetType(B, MATMPIBAIJ));
1851     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1852     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1853     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1854     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1855     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1856     PetscCall(MatHeaderMerge(Y, &B));
1857     PetscCall(PetscFree(nnz_d));
1858     PetscCall(PetscFree(nnz_o));
1859   }
1860   PetscFunctionReturn(PETSC_SUCCESS);
1861 }
1862 
1863 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1864 {
1865   PetscFunctionBegin;
1866   if (PetscDefined(USE_COMPLEX)) {
1867     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1868 
1869     PetscCall(MatConjugate_SeqBAIJ(a->A));
1870     PetscCall(MatConjugate_SeqBAIJ(a->B));
1871   }
1872   PetscFunctionReturn(PETSC_SUCCESS);
1873 }
1874 
1875 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1876 {
1877   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1878 
1879   PetscFunctionBegin;
1880   PetscCall(MatRealPart(a->A));
1881   PetscCall(MatRealPart(a->B));
1882   PetscFunctionReturn(PETSC_SUCCESS);
1883 }
1884 
1885 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1886 {
1887   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1888 
1889   PetscFunctionBegin;
1890   PetscCall(MatImaginaryPart(a->A));
1891   PetscCall(MatImaginaryPart(a->B));
1892   PetscFunctionReturn(PETSC_SUCCESS);
1893 }
1894 
1895 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1896 {
1897   IS       iscol_local;
1898   PetscInt csize;
1899 
1900   PetscFunctionBegin;
1901   PetscCall(ISGetLocalSize(iscol, &csize));
1902   if (call == MAT_REUSE_MATRIX) {
1903     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1904     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1905   } else {
1906     PetscCall(ISAllGather(iscol, &iscol_local));
1907   }
1908   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1909   if (call == MAT_INITIAL_MATRIX) {
1910     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1911     PetscCall(ISDestroy(&iscol_local));
1912   }
1913   PetscFunctionReturn(PETSC_SUCCESS);
1914 }
1915 
1916 /*
1917   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1918   in local and then by concatenating the local matrices the end result.
1919   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1920   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1921 */
1922 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1923 {
1924   PetscMPIInt  rank, size;
1925   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1926   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1927   Mat          M, Mreuse;
1928   MatScalar   *vwork, *aa;
1929   MPI_Comm     comm;
1930   IS           isrow_new, iscol_new;
1931   Mat_SeqBAIJ *aij;
1932 
1933   PetscFunctionBegin;
1934   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1935   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1936   PetscCallMPI(MPI_Comm_size(comm, &size));
1937   /* The compression and expansion should be avoided. Doesn't point
1938      out errors, might change the indices, hence buggey */
1939   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1940   if (isrow == iscol) {
1941     iscol_new = isrow_new;
1942     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1943   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1944 
1945   if (call == MAT_REUSE_MATRIX) {
1946     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1947     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1948     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1949   } else {
1950     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1951   }
1952   PetscCall(ISDestroy(&isrow_new));
1953   PetscCall(ISDestroy(&iscol_new));
1954   /*
1955       m - number of local rows
1956       n - number of columns (same on all processors)
1957       rstart - first row in new global matrix generated
1958   */
1959   PetscCall(MatGetBlockSize(mat, &bs));
1960   PetscCall(MatGetSize(Mreuse, &m, &n));
1961   m = m / bs;
1962   n = n / bs;
1963 
1964   if (call == MAT_INITIAL_MATRIX) {
1965     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1966     ii  = aij->i;
1967     jj  = aij->j;
1968 
1969     /*
1970         Determine the number of non-zeros in the diagonal and off-diagonal
1971         portions of the matrix in order to do correct preallocation
1972     */
1973 
1974     /* first get start and end of "diagonal" columns */
1975     if (csize == PETSC_DECIDE) {
1976       PetscCall(ISGetSize(isrow, &mglobal));
1977       if (mglobal == n * bs) { /* square matrix */
1978         nlocal = m;
1979       } else {
1980         nlocal = n / size + ((n % size) > rank);
1981       }
1982     } else {
1983       nlocal = csize / bs;
1984     }
1985     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1986     rstart = rend - nlocal;
1987     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1988 
1989     /* next, compute all the lengths */
1990     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1991     for (i = 0; i < m; i++) {
1992       jend = ii[i + 1] - ii[i];
1993       olen = 0;
1994       dlen = 0;
1995       for (j = 0; j < jend; j++) {
1996         if (*jj < rstart || *jj >= rend) olen++;
1997         else dlen++;
1998         jj++;
1999       }
2000       olens[i] = olen;
2001       dlens[i] = dlen;
2002     }
2003     PetscCall(MatCreate(comm, &M));
2004     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2005     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2006     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2007     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2008     PetscCall(PetscFree2(dlens, olens));
2009   } else {
2010     PetscInt ml, nl;
2011 
2012     M = *newmat;
2013     PetscCall(MatGetLocalSize(M, &ml, &nl));
2014     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2015     PetscCall(MatZeroEntries(M));
2016     /*
2017          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2018        rather than the slower MatSetValues().
2019     */
2020     M->was_assembled = PETSC_TRUE;
2021     M->assembled     = PETSC_FALSE;
2022   }
2023   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2024   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2025   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2026   ii  = aij->i;
2027   jj  = aij->j;
2028   aa  = aij->a;
2029   for (i = 0; i < m; i++) {
2030     row   = rstart / bs + i;
2031     nz    = ii[i + 1] - ii[i];
2032     cwork = jj;
2033     jj    = PetscSafePointerPlusOffset(jj, nz);
2034     vwork = aa;
2035     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2036     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2037   }
2038 
2039   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2040   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2041   *newmat = M;
2042 
2043   /* save submatrix used in processor for next request */
2044   if (call == MAT_INITIAL_MATRIX) {
2045     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2046     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2047   }
2048   PetscFunctionReturn(PETSC_SUCCESS);
2049 }
2050 
2051 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2052 {
2053   MPI_Comm        comm, pcomm;
2054   PetscInt        clocal_size, nrows;
2055   const PetscInt *rows;
2056   PetscMPIInt     size;
2057   IS              crowp, lcolp;
2058 
2059   PetscFunctionBegin;
2060   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2061   /* make a collective version of 'rowp' */
2062   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2063   if (pcomm == comm) {
2064     crowp = rowp;
2065   } else {
2066     PetscCall(ISGetSize(rowp, &nrows));
2067     PetscCall(ISGetIndices(rowp, &rows));
2068     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2069     PetscCall(ISRestoreIndices(rowp, &rows));
2070   }
2071   PetscCall(ISSetPermutation(crowp));
2072   /* make a local version of 'colp' */
2073   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2074   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2075   if (size == 1) {
2076     lcolp = colp;
2077   } else {
2078     PetscCall(ISAllGather(colp, &lcolp));
2079   }
2080   PetscCall(ISSetPermutation(lcolp));
2081   /* now we just get the submatrix */
2082   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2083   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2084   /* clean up */
2085   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2086   if (size > 1) PetscCall(ISDestroy(&lcolp));
2087   PetscFunctionReturn(PETSC_SUCCESS);
2088 }
2089 
2090 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2091 {
2092   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2093   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2094 
2095   PetscFunctionBegin;
2096   if (nghosts) *nghosts = B->nbs;
2097   if (ghosts) *ghosts = baij->garray;
2098   PetscFunctionReturn(PETSC_SUCCESS);
2099 }
2100 
2101 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2102 {
2103   Mat          B;
2104   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2105   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2106   Mat_SeqAIJ  *b;
2107   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2108   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2109   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2110 
2111   PetscFunctionBegin;
2112   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2113   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2114 
2115   /*   Tell every processor the number of nonzeros per row  */
2116   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2117   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2118   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2119   displs = recvcounts + size;
2120   for (i = 0; i < size; i++) {
2121     PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i]));
2122     PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i]));
2123   }
2124   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2125   /* Create the sequential matrix of the same type as the local block diagonal  */
2126   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2127   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2128   PetscCall(MatSetType(B, MATSEQAIJ));
2129   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2130   b = (Mat_SeqAIJ *)B->data;
2131 
2132   /*     Copy my part of matrix column indices over  */
2133   sendcount  = ad->nz + bd->nz;
2134   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2135   a_jsendbuf = ad->j;
2136   b_jsendbuf = bd->j;
2137   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2138   cnt        = 0;
2139   for (i = 0; i < n; i++) {
2140     /* put in lower diagonal portion */
2141     m = bd->i[i + 1] - bd->i[i];
2142     while (m > 0) {
2143       /* is it above diagonal (in bd (compressed) numbering) */
2144       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2145       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2146       m--;
2147     }
2148 
2149     /* put in diagonal portion */
2150     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2151 
2152     /* put in upper diagonal portion */
2153     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2154   }
2155   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2156 
2157   /*  Gather all column indices to all processors  */
2158   for (i = 0; i < size; i++) {
2159     recvcounts[i] = 0;
2160     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2161   }
2162   displs[0] = 0;
2163   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2164   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2165   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2166   /* set the b->ilen (length of each row) values */
2167   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2168   /* set the b->i indices */
2169   b->i[0] = 0;
2170   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2171   PetscCall(PetscFree(lens));
2172   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2173   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2174   PetscCall(PetscFree(recvcounts));
2175 
2176   PetscCall(MatPropagateSymmetryOptions(A, B));
2177   *newmat = B;
2178   PetscFunctionReturn(PETSC_SUCCESS);
2179 }
2180 
2181 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2182 {
2183   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2184   Vec          bb1 = NULL;
2185 
2186   PetscFunctionBegin;
2187   if (flag == SOR_APPLY_UPPER) {
2188     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2189     PetscFunctionReturn(PETSC_SUCCESS);
2190   }
2191 
2192   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2193 
2194   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2195     if (flag & SOR_ZERO_INITIAL_GUESS) {
2196       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2197       its--;
2198     }
2199 
2200     while (its--) {
2201       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2202       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2203 
2204       /* update rhs: bb1 = bb - B*x */
2205       PetscCall(VecScale(mat->lvec, -1.0));
2206       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2207 
2208       /* local sweep */
2209       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2210     }
2211   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2212     if (flag & SOR_ZERO_INITIAL_GUESS) {
2213       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2214       its--;
2215     }
2216     while (its--) {
2217       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2218       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2219 
2220       /* update rhs: bb1 = bb - B*x */
2221       PetscCall(VecScale(mat->lvec, -1.0));
2222       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2223 
2224       /* local sweep */
2225       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2226     }
2227   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2228     if (flag & SOR_ZERO_INITIAL_GUESS) {
2229       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2230       its--;
2231     }
2232     while (its--) {
2233       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2234       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2235 
2236       /* update rhs: bb1 = bb - B*x */
2237       PetscCall(VecScale(mat->lvec, -1.0));
2238       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2239 
2240       /* local sweep */
2241       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2242     }
2243   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2244 
2245   PetscCall(VecDestroy(&bb1));
2246   PetscFunctionReturn(PETSC_SUCCESS);
2247 }
2248 
2249 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2250 {
2251   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2252   PetscInt     m, N, i, *garray = aij->garray;
2253   PetscInt     ib, jb, bs = A->rmap->bs;
2254   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2255   MatScalar   *a_val = a_aij->a;
2256   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2257   MatScalar   *b_val = b_aij->a;
2258   PetscReal   *work;
2259   PetscMPIInt  iN;
2260 
2261   PetscFunctionBegin;
2262   PetscCall(MatGetSize(A, &m, &N));
2263   PetscCall(PetscCalloc1(N, &work));
2264   if (type == NORM_2) {
2265     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2266       for (jb = 0; jb < bs; jb++) {
2267         for (ib = 0; ib < bs; ib++) {
2268           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2269           a_val++;
2270         }
2271       }
2272     }
2273     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2274       for (jb = 0; jb < bs; jb++) {
2275         for (ib = 0; ib < bs; ib++) {
2276           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2277           b_val++;
2278         }
2279       }
2280     }
2281   } else if (type == NORM_1) {
2282     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2283       for (jb = 0; jb < bs; jb++) {
2284         for (ib = 0; ib < bs; ib++) {
2285           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2286           a_val++;
2287         }
2288       }
2289     }
2290     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2291       for (jb = 0; jb < bs; jb++) {
2292         for (ib = 0; ib < bs; ib++) {
2293           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2294           b_val++;
2295         }
2296       }
2297     }
2298   } else if (type == NORM_INFINITY) {
2299     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2300       for (jb = 0; jb < bs; jb++) {
2301         for (ib = 0; ib < bs; ib++) {
2302           PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2303           work[col]    = PetscMax(PetscAbsScalar(*a_val), work[col]);
2304           a_val++;
2305         }
2306       }
2307     }
2308     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2309       for (jb = 0; jb < bs; jb++) {
2310         for (ib = 0; ib < bs; ib++) {
2311           PetscInt col = garray[b_aij->j[i]] * bs + jb;
2312           work[col]    = PetscMax(PetscAbsScalar(*b_val), work[col]);
2313           b_val++;
2314         }
2315       }
2316     }
2317   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2318     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2319       for (jb = 0; jb < bs; jb++) {
2320         for (ib = 0; ib < bs; ib++) {
2321           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2322           a_val++;
2323         }
2324       }
2325     }
2326     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2327       for (jb = 0; jb < bs; jb++) {
2328         for (ib = 0; ib < bs; ib++) {
2329           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2330           b_val++;
2331         }
2332       }
2333     }
2334   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2335     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2336       for (jb = 0; jb < bs; jb++) {
2337         for (ib = 0; ib < bs; ib++) {
2338           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2339           a_val++;
2340         }
2341       }
2342     }
2343     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2344       for (jb = 0; jb < bs; jb++) {
2345         for (ib = 0; ib < bs; ib++) {
2346           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2347           b_val++;
2348         }
2349       }
2350     }
2351   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2352   PetscCall(PetscMPIIntCast(N, &iN));
2353   if (type == NORM_INFINITY) {
2354     PetscCall(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2355   } else {
2356     PetscCall(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2357   }
2358   PetscCall(PetscFree(work));
2359   if (type == NORM_2) {
2360     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2361   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2362     for (i = 0; i < N; i++) reductions[i] /= m;
2363   }
2364   PetscFunctionReturn(PETSC_SUCCESS);
2365 }
2366 
2367 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2368 {
2369   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2370 
2371   PetscFunctionBegin;
2372   PetscCall(MatInvertBlockDiagonal(a->A, values));
2373   A->factorerrortype             = a->A->factorerrortype;
2374   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2375   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2376   PetscFunctionReturn(PETSC_SUCCESS);
2377 }
2378 
2379 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2380 {
2381   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2382   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2383 
2384   PetscFunctionBegin;
2385   if (!Y->preallocated) {
2386     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2387   } else if (!aij->nz) {
2388     PetscInt nonew = aij->nonew;
2389     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2390     aij->nonew = nonew;
2391   }
2392   PetscCall(MatShift_Basic(Y, a));
2393   PetscFunctionReturn(PETSC_SUCCESS);
2394 }
2395 
2396 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2397 {
2398   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2399 
2400   PetscFunctionBegin;
2401   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2402   PetscCall(MatMissingDiagonal(a->A, missing, d));
2403   if (d) {
2404     PetscInt rstart;
2405     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2406     *d += rstart / A->rmap->bs;
2407   }
2408   PetscFunctionReturn(PETSC_SUCCESS);
2409 }
2410 
2411 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2412 {
2413   PetscFunctionBegin;
2414   *a = ((Mat_MPIBAIJ *)A->data)->A;
2415   PetscFunctionReturn(PETSC_SUCCESS);
2416 }
2417 
2418 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2419 {
2420   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2421 
2422   PetscFunctionBegin;
2423   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2424   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2425   PetscFunctionReturn(PETSC_SUCCESS);
2426 }
2427 
2428 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2429                                        MatGetRow_MPIBAIJ,
2430                                        MatRestoreRow_MPIBAIJ,
2431                                        MatMult_MPIBAIJ,
2432                                        /* 4*/ MatMultAdd_MPIBAIJ,
2433                                        MatMultTranspose_MPIBAIJ,
2434                                        MatMultTransposeAdd_MPIBAIJ,
2435                                        NULL,
2436                                        NULL,
2437                                        NULL,
2438                                        /*10*/ NULL,
2439                                        NULL,
2440                                        NULL,
2441                                        MatSOR_MPIBAIJ,
2442                                        MatTranspose_MPIBAIJ,
2443                                        /*15*/ MatGetInfo_MPIBAIJ,
2444                                        MatEqual_MPIBAIJ,
2445                                        MatGetDiagonal_MPIBAIJ,
2446                                        MatDiagonalScale_MPIBAIJ,
2447                                        MatNorm_MPIBAIJ,
2448                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2449                                        MatAssemblyEnd_MPIBAIJ,
2450                                        MatSetOption_MPIBAIJ,
2451                                        MatZeroEntries_MPIBAIJ,
2452                                        /*24*/ MatZeroRows_MPIBAIJ,
2453                                        NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        NULL,
2457                                        /*29*/ MatSetUp_MPI_Hash,
2458                                        NULL,
2459                                        NULL,
2460                                        MatGetDiagonalBlock_MPIBAIJ,
2461                                        NULL,
2462                                        /*34*/ MatDuplicate_MPIBAIJ,
2463                                        NULL,
2464                                        NULL,
2465                                        NULL,
2466                                        NULL,
2467                                        /*39*/ MatAXPY_MPIBAIJ,
2468                                        MatCreateSubMatrices_MPIBAIJ,
2469                                        MatIncreaseOverlap_MPIBAIJ,
2470                                        MatGetValues_MPIBAIJ,
2471                                        MatCopy_MPIBAIJ,
2472                                        /*44*/ NULL,
2473                                        MatScale_MPIBAIJ,
2474                                        MatShift_MPIBAIJ,
2475                                        NULL,
2476                                        MatZeroRowsColumns_MPIBAIJ,
2477                                        /*49*/ NULL,
2478                                        NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        NULL,
2482                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2483                                        NULL,
2484                                        MatSetUnfactored_MPIBAIJ,
2485                                        MatPermute_MPIBAIJ,
2486                                        MatSetValuesBlocked_MPIBAIJ,
2487                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2488                                        MatDestroy_MPIBAIJ,
2489                                        MatView_MPIBAIJ,
2490                                        NULL,
2491                                        NULL,
2492                                        /*64*/ NULL,
2493                                        NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        /*74*/ NULL,
2503                                        MatFDColoringApply_BAIJ,
2504                                        NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        /*79*/ NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        MatLoad_MPIBAIJ,
2512                                        /*84*/ NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        /*89*/ NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        /*94*/ NULL,
2523                                        NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        /*99*/ NULL,
2528                                        NULL,
2529                                        NULL,
2530                                        MatConjugate_MPIBAIJ,
2531                                        NULL,
2532                                        /*104*/ NULL,
2533                                        MatRealPart_MPIBAIJ,
2534                                        MatImaginaryPart_MPIBAIJ,
2535                                        NULL,
2536                                        NULL,
2537                                        /*109*/ NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        NULL,
2541                                        MatMissingDiagonal_MPIBAIJ,
2542                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2543                                        NULL,
2544                                        MatGetGhosts_MPIBAIJ,
2545                                        NULL,
2546                                        NULL,
2547                                        /*119*/ NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        NULL,
2551                                        MatGetMultiProcBlock_MPIBAIJ,
2552                                        /*124*/ NULL,
2553                                        MatGetColumnReductions_MPIBAIJ,
2554                                        MatInvertBlockDiagonal_MPIBAIJ,
2555                                        NULL,
2556                                        NULL,
2557                                        /*129*/ NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        /*134*/ NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        NULL,
2567                                        /*139*/ MatSetBlockSizes_Default,
2568                                        NULL,
2569                                        NULL,
2570                                        MatFDColoringSetUp_MPIXAIJ,
2571                                        NULL,
2572                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2573                                        NULL,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                        NULL,
2578                                        /*150*/ NULL,
2579                                        MatEliminateZeros_MPIBAIJ,
2580                                        MatGetRowSumAbs_MPIBAIJ,
2581                                        NULL};
2582 
2583 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2584 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2585 
2586 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2587 {
2588   PetscInt        m, rstart, cstart, cend;
2589   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2590   const PetscInt *JJ          = NULL;
2591   PetscScalar    *values      = NULL;
2592   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2593   PetscBool       nooffprocentries;
2594 
2595   PetscFunctionBegin;
2596   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2597   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2598   PetscCall(PetscLayoutSetUp(B->rmap));
2599   PetscCall(PetscLayoutSetUp(B->cmap));
2600   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2601   m      = B->rmap->n / bs;
2602   rstart = B->rmap->rstart / bs;
2603   cstart = B->cmap->rstart / bs;
2604   cend   = B->cmap->rend / bs;
2605 
2606   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2607   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2608   for (i = 0; i < m; i++) {
2609     nz = ii[i + 1] - ii[i];
2610     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2611     nz_max = PetscMax(nz_max, nz);
2612     dlen   = 0;
2613     olen   = 0;
2614     JJ     = jj + ii[i];
2615     for (j = 0; j < nz; j++) {
2616       if (*JJ < cstart || *JJ >= cend) olen++;
2617       else dlen++;
2618       JJ++;
2619     }
2620     d_nnz[i] = dlen;
2621     o_nnz[i] = olen;
2622   }
2623   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2624   PetscCall(PetscFree2(d_nnz, o_nnz));
2625 
2626   values = (PetscScalar *)V;
2627   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2628   for (i = 0; i < m; i++) {
2629     PetscInt        row   = i + rstart;
2630     PetscInt        ncols = ii[i + 1] - ii[i];
2631     const PetscInt *icols = jj + ii[i];
2632     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2633       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2634       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2635     } else { /* block ordering does not match so we can only insert one block at a time. */
2636       PetscInt j;
2637       for (j = 0; j < ncols; j++) {
2638         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2639         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2640       }
2641     }
2642   }
2643 
2644   if (!V) PetscCall(PetscFree(values));
2645   nooffprocentries    = B->nooffprocentries;
2646   B->nooffprocentries = PETSC_TRUE;
2647   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2648   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2649   B->nooffprocentries = nooffprocentries;
2650 
2651   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2652   PetscFunctionReturn(PETSC_SUCCESS);
2653 }
2654 
2655 /*@C
2656   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2657 
2658   Collective
2659 
2660   Input Parameters:
2661 + B  - the matrix
2662 . bs - the block size
2663 . i  - the indices into `j` for the start of each local row (starts with zero)
2664 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2665 - v  - optional values in the matrix, use `NULL` if not provided
2666 
2667   Level: advanced
2668 
2669   Notes:
2670   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2671   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2672   called this routine.
2673 
2674   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2675   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2676   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2677   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2678   block column and the second index is over columns within a block.
2679 
2680   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2681 
2682 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2683 @*/
2684 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2685 {
2686   PetscFunctionBegin;
2687   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2688   PetscValidType(B, 1);
2689   PetscValidLogicalCollectiveInt(B, bs, 2);
2690   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2691   PetscFunctionReturn(PETSC_SUCCESS);
2692 }
2693 
2694 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2695 {
2696   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2697   PetscInt     i;
2698   PetscMPIInt  size;
2699 
2700   PetscFunctionBegin;
2701   if (B->hash_active) {
2702     B->ops[0]      = b->cops;
2703     B->hash_active = PETSC_FALSE;
2704   }
2705   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2706   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2707   PetscCall(PetscLayoutSetUp(B->rmap));
2708   PetscCall(PetscLayoutSetUp(B->cmap));
2709   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2710 
2711   if (d_nnz) {
2712     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2713   }
2714   if (o_nnz) {
2715     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2716   }
2717 
2718   b->bs2 = bs * bs;
2719   b->mbs = B->rmap->n / bs;
2720   b->nbs = B->cmap->n / bs;
2721   b->Mbs = B->rmap->N / bs;
2722   b->Nbs = B->cmap->N / bs;
2723 
2724   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2725   b->rstartbs = B->rmap->rstart / bs;
2726   b->rendbs   = B->rmap->rend / bs;
2727   b->cstartbs = B->cmap->rstart / bs;
2728   b->cendbs   = B->cmap->rend / bs;
2729 
2730 #if defined(PETSC_USE_CTABLE)
2731   PetscCall(PetscHMapIDestroy(&b->colmap));
2732 #else
2733   PetscCall(PetscFree(b->colmap));
2734 #endif
2735   PetscCall(PetscFree(b->garray));
2736   PetscCall(VecDestroy(&b->lvec));
2737   PetscCall(VecScatterDestroy(&b->Mvctx));
2738 
2739   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2740 
2741   MatSeqXAIJGetOptions_Private(b->B);
2742   PetscCall(MatDestroy(&b->B));
2743   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2744   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2745   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2746   MatSeqXAIJRestoreOptions_Private(b->B);
2747 
2748   MatSeqXAIJGetOptions_Private(b->A);
2749   PetscCall(MatDestroy(&b->A));
2750   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2751   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2752   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2753   MatSeqXAIJRestoreOptions_Private(b->A);
2754 
2755   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2756   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2757   B->preallocated  = PETSC_TRUE;
2758   B->was_assembled = PETSC_FALSE;
2759   B->assembled     = PETSC_FALSE;
2760   PetscFunctionReturn(PETSC_SUCCESS);
2761 }
2762 
2763 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2764 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2765 
2766 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2767 {
2768   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2769   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2770   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2771   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2772 
2773   PetscFunctionBegin;
2774   PetscCall(PetscMalloc1(M + 1, &ii));
2775   ii[0] = 0;
2776   for (i = 0; i < M; i++) {
2777     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2778     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2779     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2780     /* remove one from count of matrix has diagonal */
2781     for (j = id[i]; j < id[i + 1]; j++) {
2782       if (jd[j] == i) {
2783         ii[i + 1]--;
2784         break;
2785       }
2786     }
2787   }
2788   PetscCall(PetscMalloc1(ii[M], &jj));
2789   cnt = 0;
2790   for (i = 0; i < M; i++) {
2791     for (j = io[i]; j < io[i + 1]; j++) {
2792       if (garray[jo[j]] > rstart) break;
2793       jj[cnt++] = garray[jo[j]];
2794     }
2795     for (k = id[i]; k < id[i + 1]; k++) {
2796       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2797     }
2798     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2799   }
2800   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2801   PetscFunctionReturn(PETSC_SUCCESS);
2802 }
2803 
2804 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2805 
2806 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2807 
2808 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2809 {
2810   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2811   Mat_MPIAIJ  *b;
2812   Mat          B;
2813 
2814   PetscFunctionBegin;
2815   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2816 
2817   if (reuse == MAT_REUSE_MATRIX) {
2818     B = *newmat;
2819   } else {
2820     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2821     PetscCall(MatSetType(B, MATMPIAIJ));
2822     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2823     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2824     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2825     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2826   }
2827   b = (Mat_MPIAIJ *)B->data;
2828 
2829   if (reuse == MAT_REUSE_MATRIX) {
2830     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2831     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2832   } else {
2833     PetscInt   *garray = a->garray;
2834     Mat_SeqAIJ *bB;
2835     PetscInt    bs, nnz;
2836     PetscCall(MatDestroy(&b->A));
2837     PetscCall(MatDestroy(&b->B));
2838     /* just clear out the data structure */
2839     PetscCall(MatDisAssemble_MPIAIJ(B));
2840     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2841     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2842 
2843     /* Global numbering for b->B columns */
2844     bB  = (Mat_SeqAIJ *)b->B->data;
2845     bs  = A->rmap->bs;
2846     nnz = bB->i[A->rmap->n];
2847     for (PetscInt k = 0; k < nnz; k++) {
2848       PetscInt bj = bB->j[k] / bs;
2849       PetscInt br = bB->j[k] % bs;
2850       bB->j[k]    = garray[bj] * bs + br;
2851     }
2852   }
2853   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2854   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2855 
2856   if (reuse == MAT_INPLACE_MATRIX) {
2857     PetscCall(MatHeaderReplace(A, &B));
2858   } else {
2859     *newmat = B;
2860   }
2861   PetscFunctionReturn(PETSC_SUCCESS);
2862 }
2863 
2864 /*MC
2865    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2866 
2867    Options Database Keys:
2868 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2869 . -mat_block_size <bs> - set the blocksize used to store the matrix
2870 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2871 - -mat_use_hash_table <fact> - set hash table factor
2872 
2873    Level: beginner
2874 
2875    Note:
2876     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2877     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2878 
2879 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2880 M*/
2881 
2882 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2883 
2884 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2885 {
2886   Mat_MPIBAIJ *b;
2887   PetscBool    flg = PETSC_FALSE;
2888 
2889   PetscFunctionBegin;
2890   PetscCall(PetscNew(&b));
2891   B->data      = (void *)b;
2892   B->ops[0]    = MatOps_Values;
2893   B->assembled = PETSC_FALSE;
2894 
2895   B->insertmode = NOT_SET_VALUES;
2896   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2897   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2898 
2899   /* build local table of row and column ownerships */
2900   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2901 
2902   /* build cache for off array entries formed */
2903   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2904 
2905   b->donotstash  = PETSC_FALSE;
2906   b->colmap      = NULL;
2907   b->garray      = NULL;
2908   b->roworiented = PETSC_TRUE;
2909 
2910   /* stuff used in block assembly */
2911   b->barray = NULL;
2912 
2913   /* stuff used for matrix vector multiply */
2914   b->lvec  = NULL;
2915   b->Mvctx = NULL;
2916 
2917   /* stuff for MatGetRow() */
2918   b->rowindices   = NULL;
2919   b->rowvalues    = NULL;
2920   b->getrowactive = PETSC_FALSE;
2921 
2922   /* hash table stuff */
2923   b->ht           = NULL;
2924   b->hd           = NULL;
2925   b->ht_size      = 0;
2926   b->ht_flag      = PETSC_FALSE;
2927   b->ht_fact      = 0;
2928   b->ht_total_ct  = 0;
2929   b->ht_insert_ct = 0;
2930 
2931   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2932   b->ijonly = PETSC_FALSE;
2933 
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2937 #if defined(PETSC_HAVE_HYPRE)
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2939 #endif
2940   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2941   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2942   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2943   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2944   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2945   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2946   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2947   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2948 
2949   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2950   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2951   if (flg) {
2952     PetscReal fact = 1.39;
2953     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2954     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2955     if (fact <= 1.0) fact = 1.39;
2956     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2957     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2958   }
2959   PetscOptionsEnd();
2960   PetscFunctionReturn(PETSC_SUCCESS);
2961 }
2962 
2963 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2964 /*MC
2965    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2966 
2967    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2968    and `MATMPIBAIJ` otherwise.
2969 
2970    Options Database Keys:
2971 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2972 
2973   Level: beginner
2974 
2975 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2976 M*/
2977 
2978 /*@
2979   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2980   (block compressed row).
2981 
2982   Collective
2983 
2984   Input Parameters:
2985 + B     - the matrix
2986 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2987           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2988 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2989            submatrix  (same for all local rows)
2990 . d_nnz - array containing the number of block nonzeros in the various block rows
2991            of the in diagonal portion of the local (possibly different for each block
2992            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2993            set it even if it is zero.
2994 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2995            submatrix (same for all local rows).
2996 - o_nnz - array containing the number of nonzeros in the various block rows of the
2997            off-diagonal portion of the local submatrix (possibly different for
2998            each block row) or `NULL`.
2999 
3000    If the *_nnz parameter is given then the *_nz parameter is ignored
3001 
3002   Options Database Keys:
3003 + -mat_block_size            - size of the blocks to use
3004 - -mat_use_hash_table <fact> - set hash table factor
3005 
3006   Level: intermediate
3007 
3008   Notes:
3009   For good matrix assembly performance
3010   the user should preallocate the matrix storage by setting the parameters
3011   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3012   performance can be increased by more than a factor of 50.
3013 
3014   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3015   than it must be used on all processors that share the object for that argument.
3016 
3017   Storage Information:
3018   For a square global matrix we define each processor's diagonal portion
3019   to be its local rows and the corresponding columns (a square submatrix);
3020   each processor's off-diagonal portion encompasses the remainder of the
3021   local matrix (a rectangular submatrix).
3022 
3023   The user can specify preallocated storage for the diagonal part of
3024   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3025   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3026   memory allocation.  Likewise, specify preallocated storage for the
3027   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3028 
3029   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3030   the figure below we depict these three local rows and all columns (0-11).
3031 
3032 .vb
3033            0 1 2 3 4 5 6 7 8 9 10 11
3034           --------------------------
3035    row 3  |o o o d d d o o o o  o  o
3036    row 4  |o o o d d d o o o o  o  o
3037    row 5  |o o o d d d o o o o  o  o
3038           --------------------------
3039 .ve
3040 
3041   Thus, any entries in the d locations are stored in the d (diagonal)
3042   submatrix, and any entries in the o locations are stored in the
3043   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3044   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3045 
3046   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3047   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3048   In general, for PDE problems in which most nonzeros are near the diagonal,
3049   one expects `d_nz` >> `o_nz`.
3050 
3051   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3052   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3053   You can also run with the option `-info` and look for messages with the string
3054   malloc in them to see if additional memory allocation was needed.
3055 
3056 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3057 @*/
3058 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3059 {
3060   PetscFunctionBegin;
3061   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3062   PetscValidType(B, 1);
3063   PetscValidLogicalCollectiveInt(B, bs, 2);
3064   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3065   PetscFunctionReturn(PETSC_SUCCESS);
3066 }
3067 
3068 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3069 /*@
3070   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3071   (block compressed row).
3072 
3073   Collective
3074 
3075   Input Parameters:
3076 + comm  - MPI communicator
3077 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3078           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3079 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3080           This value should be the same as the local size used in creating the
3081           y vector for the matrix-vector product y = Ax.
3082 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3083           This value should be the same as the local size used in creating the
3084           x vector for the matrix-vector product y = Ax.
3085 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3086 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3087 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3088           submatrix  (same for all local rows)
3089 . d_nnz - array containing the number of nonzero blocks in the various block rows
3090           of the in diagonal portion of the local (possibly different for each block
3091           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3092           and set it even if it is zero.
3093 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3094           submatrix (same for all local rows).
3095 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3096           off-diagonal portion of the local submatrix (possibly different for
3097           each block row) or NULL.
3098 
3099   Output Parameter:
3100 . A - the matrix
3101 
3102   Options Database Keys:
3103 + -mat_block_size            - size of the blocks to use
3104 - -mat_use_hash_table <fact> - set hash table factor
3105 
3106   Level: intermediate
3107 
3108   Notes:
3109   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3110   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3111   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3112 
3113   For good matrix assembly performance
3114   the user should preallocate the matrix storage by setting the parameters
3115   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3116   performance can be increased by more than a factor of 50.
3117 
3118   If the *_nnz parameter is given then the *_nz parameter is ignored
3119 
3120   A nonzero block is any block that as 1 or more nonzeros in it
3121 
3122   The user MUST specify either the local or global matrix dimensions
3123   (possibly both).
3124 
3125   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3126   than it must be used on all processors that share the object for that argument.
3127 
3128   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3129   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3130 
3131   Storage Information:
3132   For a square global matrix we define each processor's diagonal portion
3133   to be its local rows and the corresponding columns (a square submatrix);
3134   each processor's off-diagonal portion encompasses the remainder of the
3135   local matrix (a rectangular submatrix).
3136 
3137   The user can specify preallocated storage for the diagonal part of
3138   the local submatrix with either d_nz or d_nnz (not both).  Set
3139   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3140   memory allocation.  Likewise, specify preallocated storage for the
3141   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3142 
3143   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3144   the figure below we depict these three local rows and all columns (0-11).
3145 
3146 .vb
3147            0 1 2 3 4 5 6 7 8 9 10 11
3148           --------------------------
3149    row 3  |o o o d d d o o o o  o  o
3150    row 4  |o o o d d d o o o o  o  o
3151    row 5  |o o o d d d o o o o  o  o
3152           --------------------------
3153 .ve
3154 
3155   Thus, any entries in the d locations are stored in the d (diagonal)
3156   submatrix, and any entries in the o locations are stored in the
3157   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3158   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3159 
3160   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3161   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3162   In general, for PDE problems in which most nonzeros are near the diagonal,
3163   one expects `d_nz` >> `o_nz`.
3164 
3165 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3166           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3167 @*/
3168 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3169 {
3170   PetscMPIInt size;
3171 
3172   PetscFunctionBegin;
3173   PetscCall(MatCreate(comm, A));
3174   PetscCall(MatSetSizes(*A, m, n, M, N));
3175   PetscCallMPI(MPI_Comm_size(comm, &size));
3176   if (size > 1) {
3177     PetscCall(MatSetType(*A, MATMPIBAIJ));
3178     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3179   } else {
3180     PetscCall(MatSetType(*A, MATSEQBAIJ));
3181     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3182   }
3183   PetscFunctionReturn(PETSC_SUCCESS);
3184 }
3185 
3186 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3187 {
3188   Mat          mat;
3189   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3190   PetscInt     len = 0;
3191 
3192   PetscFunctionBegin;
3193   *newmat = NULL;
3194   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3195   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3196   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3197 
3198   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3199   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3200   if (matin->hash_active) {
3201     PetscCall(MatSetUp(mat));
3202   } else {
3203     mat->factortype   = matin->factortype;
3204     mat->preallocated = PETSC_TRUE;
3205     mat->assembled    = PETSC_TRUE;
3206     mat->insertmode   = NOT_SET_VALUES;
3207 
3208     a             = (Mat_MPIBAIJ *)mat->data;
3209     mat->rmap->bs = matin->rmap->bs;
3210     a->bs2        = oldmat->bs2;
3211     a->mbs        = oldmat->mbs;
3212     a->nbs        = oldmat->nbs;
3213     a->Mbs        = oldmat->Mbs;
3214     a->Nbs        = oldmat->Nbs;
3215 
3216     a->size         = oldmat->size;
3217     a->rank         = oldmat->rank;
3218     a->donotstash   = oldmat->donotstash;
3219     a->roworiented  = oldmat->roworiented;
3220     a->rowindices   = NULL;
3221     a->rowvalues    = NULL;
3222     a->getrowactive = PETSC_FALSE;
3223     a->barray       = NULL;
3224     a->rstartbs     = oldmat->rstartbs;
3225     a->rendbs       = oldmat->rendbs;
3226     a->cstartbs     = oldmat->cstartbs;
3227     a->cendbs       = oldmat->cendbs;
3228 
3229     /* hash table stuff */
3230     a->ht           = NULL;
3231     a->hd           = NULL;
3232     a->ht_size      = 0;
3233     a->ht_flag      = oldmat->ht_flag;
3234     a->ht_fact      = oldmat->ht_fact;
3235     a->ht_total_ct  = 0;
3236     a->ht_insert_ct = 0;
3237 
3238     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3239     if (oldmat->colmap) {
3240 #if defined(PETSC_USE_CTABLE)
3241       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3242 #else
3243       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3244       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3245 #endif
3246     } else a->colmap = NULL;
3247 
3248     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3249       PetscCall(PetscMalloc1(len, &a->garray));
3250       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3251     } else a->garray = NULL;
3252 
3253     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3254     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3255     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3256 
3257     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3258     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3259   }
3260   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3261   *newmat = mat;
3262   PetscFunctionReturn(PETSC_SUCCESS);
3263 }
3264 
3265 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3266 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3267 {
3268   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3269   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3270   PetscScalar *matvals;
3271 
3272   PetscFunctionBegin;
3273   PetscCall(PetscViewerSetUp(viewer));
3274 
3275   /* read in matrix header */
3276   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3277   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3278   M  = header[1];
3279   N  = header[2];
3280   nz = header[3];
3281   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3282   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3283   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3284 
3285   /* set block sizes from the viewer's .info file */
3286   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3287   /* set local sizes if not set already */
3288   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3289   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3290   /* set global sizes if not set already */
3291   if (mat->rmap->N < 0) mat->rmap->N = M;
3292   if (mat->cmap->N < 0) mat->cmap->N = N;
3293   PetscCall(PetscLayoutSetUp(mat->rmap));
3294   PetscCall(PetscLayoutSetUp(mat->cmap));
3295 
3296   /* check if the matrix sizes are correct */
3297   PetscCall(MatGetSize(mat, &rows, &cols));
3298   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3299   PetscCall(MatGetBlockSize(mat, &bs));
3300   PetscCall(MatGetLocalSize(mat, &m, &n));
3301   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3302   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3303   mbs = m / bs;
3304   nbs = n / bs;
3305 
3306   /* read in row lengths and build row indices */
3307   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3308   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3309   rowidxs[0] = 0;
3310   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3311   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3312   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3313 
3314   /* read in column indices and matrix values */
3315   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3316   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3317   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3318 
3319   {                /* preallocate matrix storage */
3320     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3321     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3322     PetscBool  sbaij, done;
3323     PetscInt  *d_nnz, *o_nnz;
3324 
3325     PetscCall(PetscBTCreate(nbs, &bt));
3326     PetscCall(PetscHSetICreate(&ht));
3327     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3328     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3329     for (i = 0; i < mbs; i++) {
3330       PetscCall(PetscBTMemzero(nbs, bt));
3331       PetscCall(PetscHSetIClear(ht));
3332       for (k = 0; k < bs; k++) {
3333         PetscInt row = bs * i + k;
3334         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3335           PetscInt col = colidxs[j];
3336           if (!sbaij || col >= row) {
3337             if (col >= cs && col < ce) {
3338               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3339             } else {
3340               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3341               if (done) o_nnz[i]++;
3342             }
3343           }
3344         }
3345       }
3346     }
3347     PetscCall(PetscBTDestroy(&bt));
3348     PetscCall(PetscHSetIDestroy(&ht));
3349     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3350     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3351     PetscCall(PetscFree2(d_nnz, o_nnz));
3352   }
3353 
3354   /* store matrix values */
3355   for (i = 0; i < m; i++) {
3356     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3357     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3358   }
3359 
3360   PetscCall(PetscFree(rowidxs));
3361   PetscCall(PetscFree2(colidxs, matvals));
3362   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3363   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3364   PetscFunctionReturn(PETSC_SUCCESS);
3365 }
3366 
3367 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3368 {
3369   PetscBool isbinary;
3370 
3371   PetscFunctionBegin;
3372   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3373   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3374   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3375   PetscFunctionReturn(PETSC_SUCCESS);
3376 }
3377 
3378 /*@
3379   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3380 
3381   Input Parameters:
3382 + mat  - the matrix
3383 - fact - factor
3384 
3385   Options Database Key:
3386 . -mat_use_hash_table <fact> - provide the factor
3387 
3388   Level: advanced
3389 
3390 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3391 @*/
3392 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3393 {
3394   PetscFunctionBegin;
3395   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3396   PetscFunctionReturn(PETSC_SUCCESS);
3397 }
3398 
3399 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3400 {
3401   Mat_MPIBAIJ *baij;
3402 
3403   PetscFunctionBegin;
3404   baij          = (Mat_MPIBAIJ *)mat->data;
3405   baij->ht_fact = fact;
3406   PetscFunctionReturn(PETSC_SUCCESS);
3407 }
3408 
3409 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3410 {
3411   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3412   PetscBool    flg;
3413 
3414   PetscFunctionBegin;
3415   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3416   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3417   if (Ad) *Ad = a->A;
3418   if (Ao) *Ao = a->B;
3419   if (colmap) *colmap = a->garray;
3420   PetscFunctionReturn(PETSC_SUCCESS);
3421 }
3422 
3423 /*
3424     Special version for direct calls from Fortran (to eliminate two function call overheads
3425 */
3426 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3427   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3428 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3429   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3430 #endif
3431 
3432 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3433 /*@C
3434   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3435 
3436   Collective
3437 
3438   Input Parameters:
3439 + matin  - the matrix
3440 . min    - number of input rows
3441 . im     - input rows
3442 . nin    - number of input columns
3443 . in     - input columns
3444 . v      - numerical values input
3445 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3446 
3447   Level: advanced
3448 
3449   Developer Notes:
3450   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3451 
3452 .seealso: `Mat`, `MatSetValuesBlocked()`
3453 @*/
3454 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3455 {
3456   /* convert input arguments to C version */
3457   Mat        mat = *matin;
3458   PetscInt   m = *min, n = *nin;
3459   InsertMode addv = *addvin;
3460 
3461   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3462   const MatScalar *value;
3463   MatScalar       *barray      = baij->barray;
3464   PetscBool        roworiented = baij->roworiented;
3465   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3466   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3467   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3468 
3469   PetscFunctionBegin;
3470   /* tasks normally handled by MatSetValuesBlocked() */
3471   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3472   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3473   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3474   if (mat->assembled) {
3475     mat->was_assembled = PETSC_TRUE;
3476     mat->assembled     = PETSC_FALSE;
3477   }
3478   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3479 
3480   if (!barray) {
3481     PetscCall(PetscMalloc1(bs2, &barray));
3482     baij->barray = barray;
3483   }
3484 
3485   if (roworiented) stepval = (n - 1) * bs;
3486   else stepval = (m - 1) * bs;
3487 
3488   for (i = 0; i < m; i++) {
3489     if (im[i] < 0) continue;
3490     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3491     if (im[i] >= rstart && im[i] < rend) {
3492       row = im[i] - rstart;
3493       for (j = 0; j < n; j++) {
3494         /* If NumCol = 1 then a copy is not required */
3495         if ((roworiented) && (n == 1)) {
3496           barray = (MatScalar *)v + i * bs2;
3497         } else if ((!roworiented) && (m == 1)) {
3498           barray = (MatScalar *)v + j * bs2;
3499         } else { /* Here a copy is required */
3500           if (roworiented) {
3501             value = v + i * (stepval + bs) * bs + j * bs;
3502           } else {
3503             value = v + j * (stepval + bs) * bs + i * bs;
3504           }
3505           for (ii = 0; ii < bs; ii++, value += stepval) {
3506             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3507           }
3508           barray -= bs2;
3509         }
3510 
3511         if (in[j] >= cstart && in[j] < cend) {
3512           col = in[j] - cstart;
3513           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3514         } else if (in[j] < 0) {
3515           continue;
3516         } else {
3517           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3518           if (mat->was_assembled) {
3519             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3520 
3521 #if defined(PETSC_USE_DEBUG)
3522   #if defined(PETSC_USE_CTABLE)
3523             {
3524               PetscInt data;
3525               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3526               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3527             }
3528   #else
3529             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3530   #endif
3531 #endif
3532 #if defined(PETSC_USE_CTABLE)
3533             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3534             col = (col - 1) / bs;
3535 #else
3536             col = (baij->colmap[in[j]] - 1) / bs;
3537 #endif
3538             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3539               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3540               col = in[j];
3541             }
3542           } else col = in[j];
3543           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3544         }
3545       }
3546     } else {
3547       if (!baij->donotstash) {
3548         if (roworiented) {
3549           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3550         } else {
3551           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3552         }
3553       }
3554     }
3555   }
3556 
3557   /* task normally handled by MatSetValuesBlocked() */
3558   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3559   PetscFunctionReturn(PETSC_SUCCESS);
3560 }
3561 
3562 /*@
3563   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3564 
3565   Collective
3566 
3567   Input Parameters:
3568 + comm - MPI communicator
3569 . bs   - the block size, only a block size of 1 is supported
3570 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3571 . n    - This value should be the same as the local size used in creating the
3572          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3573          calculated if `N` is given) For square matrices `n` is almost always `m`.
3574 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3575 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3576 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3577 . j    - column indices
3578 - a    - matrix values
3579 
3580   Output Parameter:
3581 . mat - the matrix
3582 
3583   Level: intermediate
3584 
3585   Notes:
3586   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3587   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3588   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3589 
3590   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3591   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3592   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3593   with column-major ordering within blocks.
3594 
3595   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3596 
3597 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3598           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3599 @*/
3600 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3601 {
3602   PetscFunctionBegin;
3603   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3604   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3605   PetscCall(MatCreate(comm, mat));
3606   PetscCall(MatSetSizes(*mat, m, n, M, N));
3607   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3608   PetscCall(MatSetBlockSize(*mat, bs));
3609   PetscCall(MatSetUp(*mat));
3610   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3611   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3612   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3613   PetscFunctionReturn(PETSC_SUCCESS);
3614 }
3615 
3616 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3617 {
3618   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3619   PetscInt    *indx;
3620   PetscScalar *values;
3621 
3622   PetscFunctionBegin;
3623   PetscCall(MatGetSize(inmat, &m, &N));
3624   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3625     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3626     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3627     PetscInt    *bindx, rmax = a->rmax, j;
3628     PetscMPIInt  rank, size;
3629 
3630     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3631     mbs = m / bs;
3632     Nbs = N / cbs;
3633     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3634     nbs = n / cbs;
3635 
3636     PetscCall(PetscMalloc1(rmax, &bindx));
3637     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3638 
3639     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3640     PetscCallMPI(MPI_Comm_rank(comm, &size));
3641     if (rank == size - 1) {
3642       /* Check sum(nbs) = Nbs */
3643       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3644     }
3645 
3646     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3647     for (i = 0; i < mbs; i++) {
3648       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3649       nnz = nnz / bs;
3650       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3651       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3652       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3653     }
3654     PetscCall(PetscFree(bindx));
3655 
3656     PetscCall(MatCreate(comm, outmat));
3657     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3658     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3659     PetscCall(MatSetType(*outmat, MATBAIJ));
3660     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3661     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3662     MatPreallocateEnd(dnz, onz);
3663     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3664   }
3665 
3666   /* numeric phase */
3667   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3668   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3669 
3670   for (i = 0; i < m; i++) {
3671     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3672     Ii = i + rstart;
3673     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3674     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3675   }
3676   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3677   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3678   PetscFunctionReturn(PETSC_SUCCESS);
3679 }
3680