xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
10   Mat B;
11 
12   PetscFunctionBegin;
13   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
14   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
15   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
16   PetscCall(MatDestroy(&B));
17   PetscFunctionReturn(0);
18 }
19 
20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
21   Mat B;
22 
23   PetscFunctionBegin;
24   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
25   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
26   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
27   PetscFunctionReturn(0);
28 }
29 
30 /*MC
31    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
34    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
35   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
41 
42   Developer Note:
43     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
44    enough exist.
45 
46   Level: beginner
47 
48 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
49 M*/
50 
51 /*MC
52    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
53 
54    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
55    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
56    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
57   for communicators controlling multiple processes.  It is recommended that you call both of
58   the above preallocation routines for simplicity.
59 
60    Options Database Keys:
61 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
62 
63   Level: beginner
64 
65 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
66 M*/
67 
68 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) {
69   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
70 
71   PetscFunctionBegin;
72 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
73   A->boundtocpu = flg;
74 #endif
75   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
76   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
77 
78   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
79    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
80    * to differ from the parent matrix. */
81   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
82   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
83 
84   PetscFunctionReturn(0);
85 }
86 
87 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) {
88   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
89 
90   PetscFunctionBegin;
91   if (mat->A) {
92     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
93     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
94   }
95   PetscFunctionReturn(0);
96 }
97 
98 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) {
99   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
100   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
101   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
102   const PetscInt  *ia, *ib;
103   const MatScalar *aa, *bb, *aav, *bav;
104   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
105   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
106 
107   PetscFunctionBegin;
108   *keptrows = NULL;
109 
110   ia = a->i;
111   ib = b->i;
112   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
113   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
114   for (i = 0; i < m; i++) {
115     na = ia[i + 1] - ia[i];
116     nb = ib[i + 1] - ib[i];
117     if (!na && !nb) {
118       cnt++;
119       goto ok1;
120     }
121     aa = aav + ia[i];
122     for (j = 0; j < na; j++) {
123       if (aa[j] != 0.0) goto ok1;
124     }
125     bb = bav + ib[i];
126     for (j = 0; j < nb; j++) {
127       if (bb[j] != 0.0) goto ok1;
128     }
129     cnt++;
130   ok1:;
131   }
132   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
133   if (!n0rows) {
134     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
135     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
136     PetscFunctionReturn(0);
137   }
138   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
139   cnt = 0;
140   for (i = 0; i < m; i++) {
141     na = ia[i + 1] - ia[i];
142     nb = ib[i + 1] - ib[i];
143     if (!na && !nb) continue;
144     aa = aav + ia[i];
145     for (j = 0; j < na; j++) {
146       if (aa[j] != 0.0) {
147         rows[cnt++] = rstart + i;
148         goto ok2;
149       }
150     }
151     bb = bav + ib[i];
152     for (j = 0; j < nb; j++) {
153       if (bb[j] != 0.0) {
154         rows[cnt++] = rstart + i;
155         goto ok2;
156       }
157     }
158   ok2:;
159   }
160   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
161   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
162   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
163   PetscFunctionReturn(0);
164 }
165 
166 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) {
167   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
168   PetscBool   cong;
169 
170   PetscFunctionBegin;
171   PetscCall(MatHasCongruentLayouts(Y, &cong));
172   if (Y->assembled && cong) {
173     PetscCall(MatDiagonalSet(aij->A, D, is));
174   } else {
175     PetscCall(MatDiagonalSet_Default(Y, D, is));
176   }
177   PetscFunctionReturn(0);
178 }
179 
180 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) {
181   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
182   PetscInt    i, rstart, nrows, *rows;
183 
184   PetscFunctionBegin;
185   *zrows = NULL;
186   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
187   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
188   for (i = 0; i < nrows; i++) rows[i] += rstart;
189   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
190   PetscFunctionReturn(0);
191 }
192 
193 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) {
194   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
195   PetscInt           i, m, n, *garray = aij->garray;
196   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
197   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
198   PetscReal         *work;
199   const PetscScalar *dummy;
200 
201   PetscFunctionBegin;
202   PetscCall(MatGetSize(A, &m, &n));
203   PetscCall(PetscCalloc1(n, &work));
204   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
205   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
206   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
207   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
208   if (type == NORM_2) {
209     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
210     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
211   } else if (type == NORM_1) {
212     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
213     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
214   } else if (type == NORM_INFINITY) {
215     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
216     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
217   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
218     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
219     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
220   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
221     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
222     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
223   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
224   if (type == NORM_INFINITY) {
225     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
226   } else {
227     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
228   }
229   PetscCall(PetscFree(work));
230   if (type == NORM_2) {
231     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
232   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
233     for (i = 0; i < n; i++) reductions[i] /= m;
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) {
239   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
240   IS              sis, gis;
241   const PetscInt *isis, *igis;
242   PetscInt        n, *iis, nsis, ngis, rstart, i;
243 
244   PetscFunctionBegin;
245   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
246   PetscCall(MatFindNonzeroRows(a->B, &gis));
247   PetscCall(ISGetSize(gis, &ngis));
248   PetscCall(ISGetSize(sis, &nsis));
249   PetscCall(ISGetIndices(sis, &isis));
250   PetscCall(ISGetIndices(gis, &igis));
251 
252   PetscCall(PetscMalloc1(ngis + nsis, &iis));
253   PetscCall(PetscArraycpy(iis, igis, ngis));
254   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
255   n = ngis + nsis;
256   PetscCall(PetscSortRemoveDupsInt(&n, iis));
257   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
258   for (i = 0; i < n; i++) iis[i] += rstart;
259   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
260 
261   PetscCall(ISRestoreIndices(sis, &isis));
262   PetscCall(ISRestoreIndices(gis, &igis));
263   PetscCall(ISDestroy(&sis));
264   PetscCall(ISDestroy(&gis));
265   PetscFunctionReturn(0);
266 }
267 
268 /*
269   Local utility routine that creates a mapping from the global column
270 number to the local number in the off-diagonal part of the local
271 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
272 a slightly higher hash table cost; without it it is not scalable (each processor
273 has an order N integer array but is fast to access.
274 */
275 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) {
276   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
277   PetscInt    n   = aij->B->cmap->n, i;
278 
279   PetscFunctionBegin;
280   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
281 #if defined(PETSC_USE_CTABLE)
282   PetscCall(PetscTableCreate(n, mat->cmap->N + 1, &aij->colmap));
283   for (i = 0; i < n; i++) PetscCall(PetscTableAdd(aij->colmap, aij->garray[i] + 1, i + 1, INSERT_VALUES));
284 #else
285   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
286   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
287 #endif
288   PetscFunctionReturn(0);
289 }
290 
291 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
292   { \
293     if (col <= lastcol1) low1 = 0; \
294     else high1 = nrow1; \
295     lastcol1 = col; \
296     while (high1 - low1 > 5) { \
297       t = (low1 + high1) / 2; \
298       if (rp1[t] > col) high1 = t; \
299       else low1 = t; \
300     } \
301     for (_i = low1; _i < high1; _i++) { \
302       if (rp1[_i] > col) break; \
303       if (rp1[_i] == col) { \
304         if (addv == ADD_VALUES) { \
305           ap1[_i] += value; \
306           /* Not sure LogFlops will slow dow the code or not */ \
307           (void)PetscLogFlops(1.0); \
308         } else ap1[_i] = value; \
309         goto a_noinsert; \
310       } \
311     } \
312     if (value == 0.0 && ignorezeroentries && row != col) { \
313       low1  = 0; \
314       high1 = nrow1; \
315       goto a_noinsert; \
316     } \
317     if (nonew == 1) { \
318       low1  = 0; \
319       high1 = nrow1; \
320       goto a_noinsert; \
321     } \
322     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
323     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
324     N = nrow1++ - 1; \
325     a->nz++; \
326     high1++; \
327     /* shift up all the later entries in this row */ \
328     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
329     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
330     rp1[_i] = col; \
331     ap1[_i] = value; \
332     A->nonzerostate++; \
333   a_noinsert:; \
334     ailen[row] = nrow1; \
335   }
336 
337 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
338   { \
339     if (col <= lastcol2) low2 = 0; \
340     else high2 = nrow2; \
341     lastcol2 = col; \
342     while (high2 - low2 > 5) { \
343       t = (low2 + high2) / 2; \
344       if (rp2[t] > col) high2 = t; \
345       else low2 = t; \
346     } \
347     for (_i = low2; _i < high2; _i++) { \
348       if (rp2[_i] > col) break; \
349       if (rp2[_i] == col) { \
350         if (addv == ADD_VALUES) { \
351           ap2[_i] += value; \
352           (void)PetscLogFlops(1.0); \
353         } else ap2[_i] = value; \
354         goto b_noinsert; \
355       } \
356     } \
357     if (value == 0.0 && ignorezeroentries) { \
358       low2  = 0; \
359       high2 = nrow2; \
360       goto b_noinsert; \
361     } \
362     if (nonew == 1) { \
363       low2  = 0; \
364       high2 = nrow2; \
365       goto b_noinsert; \
366     } \
367     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
368     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
369     N = nrow2++ - 1; \
370     b->nz++; \
371     high2++; \
372     /* shift up all the later entries in this row */ \
373     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
374     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
375     rp2[_i] = col; \
376     ap2[_i] = value; \
377     B->nonzerostate++; \
378   b_noinsert:; \
379     bilen[row] = nrow2; \
380   }
381 
382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) {
383   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
384   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
385   PetscInt     l, *garray                         = mat->garray, diag;
386   PetscScalar *aa, *ba;
387 
388   PetscFunctionBegin;
389   /* code only works for square matrices A */
390 
391   /* find size of row to the left of the diagonal part */
392   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
393   row = row - diag;
394   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
395     if (garray[b->j[b->i[row] + l]] > diag) break;
396   }
397   if (l) {
398     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
399     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
400     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
401   }
402 
403   /* diagonal part */
404   if (a->i[row + 1] - a->i[row]) {
405     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
406     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
407     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
408   }
409 
410   /* right of diagonal part */
411   if (b->i[row + 1] - b->i[row] - l) {
412     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
413     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
414     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
415   }
416   PetscFunctionReturn(0);
417 }
418 
419 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
420   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
421   PetscScalar value = 0.0;
422   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
423   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
424   PetscBool   roworiented = aij->roworiented;
425 
426   /* Some Variables required in the macro */
427   Mat         A     = aij->A;
428   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
429   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
430   PetscBool   ignorezeroentries = a->ignorezeroentries;
431   Mat         B                 = aij->B;
432   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
433   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
434   MatScalar  *aa, *ba;
435   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
436   PetscInt    nonew;
437   MatScalar  *ap1, *ap2;
438 
439   PetscFunctionBegin;
440   PetscCall(MatSeqAIJGetArray(A, &aa));
441   PetscCall(MatSeqAIJGetArray(B, &ba));
442   for (i = 0; i < m; i++) {
443     if (im[i] < 0) continue;
444     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
445     if (im[i] >= rstart && im[i] < rend) {
446       row      = im[i] - rstart;
447       lastcol1 = -1;
448       rp1      = aj + ai[row];
449       ap1      = aa + ai[row];
450       rmax1    = aimax[row];
451       nrow1    = ailen[row];
452       low1     = 0;
453       high1    = nrow1;
454       lastcol2 = -1;
455       rp2      = bj + bi[row];
456       ap2      = ba + bi[row];
457       rmax2    = bimax[row];
458       nrow2    = bilen[row];
459       low2     = 0;
460       high2    = nrow2;
461 
462       for (j = 0; j < n; j++) {
463         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
464         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
465         if (in[j] >= cstart && in[j] < cend) {
466           col   = in[j] - cstart;
467           nonew = a->nonew;
468           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
469         } else if (in[j] < 0) {
470           continue;
471         } else {
472           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
473           if (mat->was_assembled) {
474             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
475 #if defined(PETSC_USE_CTABLE)
476             PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); /* map global col ids to local ones */
477             col--;
478 #else
479             col = aij->colmap[in[j]] - 1;
480 #endif
481             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
482               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
483               col   = in[j];
484               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
485               B     = aij->B;
486               b     = (Mat_SeqAIJ *)B->data;
487               bimax = b->imax;
488               bi    = b->i;
489               bilen = b->ilen;
490               bj    = b->j;
491               ba    = b->a;
492               rp2   = bj + bi[row];
493               ap2   = ba + bi[row];
494               rmax2 = bimax[row];
495               nrow2 = bilen[row];
496               low2  = 0;
497               high2 = nrow2;
498               bm    = aij->B->rmap->n;
499               ba    = b->a;
500             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
501               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
502                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
503               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
504             }
505           } else col = in[j];
506           nonew = b->nonew;
507           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
508         }
509       }
510     } else {
511       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
512       if (!aij->donotstash) {
513         mat->assembled = PETSC_FALSE;
514         if (roworiented) {
515           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
516         } else {
517           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
518         }
519       }
520     }
521   }
522   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
523   PetscCall(MatSeqAIJRestoreArray(B, &ba));
524   PetscFunctionReturn(0);
525 }
526 
527 /*
528     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
529     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
530     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
531 */
532 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) {
533   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
534   Mat         A      = aij->A; /* diagonal part of the matrix */
535   Mat         B      = aij->B; /* offdiagonal part of the matrix */
536   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
537   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
538   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
539   PetscInt   *ailen = a->ilen, *aj = a->j;
540   PetscInt   *bilen = b->ilen, *bj = b->j;
541   PetscInt    am          = aij->A->rmap->n, j;
542   PetscInt    diag_so_far = 0, dnz;
543   PetscInt    offd_so_far = 0, onz;
544 
545   PetscFunctionBegin;
546   /* Iterate over all rows of the matrix */
547   for (j = 0; j < am; j++) {
548     dnz = onz = 0;
549     /*  Iterate over all non-zero columns of the current row */
550     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
551       /* If column is in the diagonal */
552       if (mat_j[col] >= cstart && mat_j[col] < cend) {
553         aj[diag_so_far++] = mat_j[col] - cstart;
554         dnz++;
555       } else { /* off-diagonal entries */
556         bj[offd_so_far++] = mat_j[col];
557         onz++;
558       }
559     }
560     ailen[j] = dnz;
561     bilen[j] = onz;
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
568     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
569     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
570     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
571     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
572 */
573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) {
574   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
575   Mat          A    = aij->A; /* diagonal part of the matrix */
576   Mat          B    = aij->B; /* offdiagonal part of the matrix */
577   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
578   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
579   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
580   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
581   PetscInt    *ailen = a->ilen, *aj = a->j;
582   PetscInt    *bilen = b->ilen, *bj = b->j;
583   PetscInt     am          = aij->A->rmap->n, j;
584   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
585   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
586   PetscScalar *aa = a->a, *ba = b->a;
587 
588   PetscFunctionBegin;
589   /* Iterate over all rows of the matrix */
590   for (j = 0; j < am; j++) {
591     dnz_row = onz_row = 0;
592     rowstart_offd     = full_offd_i[j];
593     rowstart_diag     = full_diag_i[j];
594     /*  Iterate over all non-zero columns of the current row */
595     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
596       /* If column is in the diagonal */
597       if (mat_j[col] >= cstart && mat_j[col] < cend) {
598         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
599         aa[rowstart_diag + dnz_row] = mat_a[col];
600         dnz_row++;
601       } else { /* off-diagonal entries */
602         bj[rowstart_offd + onz_row] = mat_j[col];
603         ba[rowstart_offd + onz_row] = mat_a[col];
604         onz_row++;
605       }
606     }
607     ailen[j] = dnz_row;
608     bilen[j] = onz_row;
609   }
610   PetscFunctionReturn(0);
611 }
612 
613 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
614   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
615   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
616   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
617 
618   PetscFunctionBegin;
619   for (i = 0; i < m; i++) {
620     if (idxm[i] < 0) continue; /* negative row */
621     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
622     if (idxm[i] >= rstart && idxm[i] < rend) {
623       row = idxm[i] - rstart;
624       for (j = 0; j < n; j++) {
625         if (idxn[j] < 0) continue; /* negative column */
626         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
627         if (idxn[j] >= cstart && idxn[j] < cend) {
628           col = idxn[j] - cstart;
629           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
630         } else {
631           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
632 #if defined(PETSC_USE_CTABLE)
633           PetscCall(PetscTableFind(aij->colmap, idxn[j] + 1, &col));
634           col--;
635 #else
636           col = aij->colmap[idxn[j]] - 1;
637 #endif
638           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
639           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
640         }
641       }
642     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
643   }
644   PetscFunctionReturn(0);
645 }
646 
647 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) {
648   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
649   PetscInt    nstash, reallocs;
650 
651   PetscFunctionBegin;
652   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
653 
654   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
655   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
656   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
657   PetscFunctionReturn(0);
658 }
659 
660 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) {
661   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
662   PetscMPIInt  n;
663   PetscInt     i, j, rstart, ncols, flg;
664   PetscInt    *row, *col;
665   PetscBool    other_disassembled;
666   PetscScalar *val;
667 
668   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
669 
670   PetscFunctionBegin;
671   if (!aij->donotstash && !mat->nooffprocentries) {
672     while (1) {
673       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
674       if (!flg) break;
675 
676       for (i = 0; i < n;) {
677         /* Now identify the consecutive vals belonging to the same row */
678         for (j = i, rstart = row[j]; j < n; j++) {
679           if (row[j] != rstart) break;
680         }
681         if (j < n) ncols = j - i;
682         else ncols = n - i;
683         /* Now assemble all these values with a single function call */
684         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
685         i = j;
686       }
687     }
688     PetscCall(MatStashScatterEnd_Private(&mat->stash));
689   }
690 #if defined(PETSC_HAVE_DEVICE)
691   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
692   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
693   if (mat->boundtocpu) {
694     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
695     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
696   }
697 #endif
698   PetscCall(MatAssemblyBegin(aij->A, mode));
699   PetscCall(MatAssemblyEnd(aij->A, mode));
700 
701   /* determine if any processor has disassembled, if so we must
702      also disassemble ourself, in order that we may reassemble. */
703   /*
704      if nonzero structure of submatrix B cannot change then we know that
705      no processor disassembled thus we can skip this stuff
706   */
707   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
708     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
709     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
710       PetscCall(MatDisAssemble_MPIAIJ(mat));
711     }
712   }
713   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
714   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
715 #if defined(PETSC_HAVE_DEVICE)
716   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
717 #endif
718   PetscCall(MatAssemblyBegin(aij->B, mode));
719   PetscCall(MatAssemblyEnd(aij->B, mode));
720 
721   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
722 
723   aij->rowvalues = NULL;
724 
725   PetscCall(VecDestroy(&aij->diag));
726 
727   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
728   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
729     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
730     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
731   }
732 #if defined(PETSC_HAVE_DEVICE)
733   mat->offloadmask = PETSC_OFFLOAD_BOTH;
734 #endif
735   PetscFunctionReturn(0);
736 }
737 
738 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) {
739   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
740 
741   PetscFunctionBegin;
742   PetscCall(MatZeroEntries(l->A));
743   PetscCall(MatZeroEntries(l->B));
744   PetscFunctionReturn(0);
745 }
746 
747 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
748   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
749   PetscObjectState sA, sB;
750   PetscInt        *lrows;
751   PetscInt         r, len;
752   PetscBool        cong, lch, gch;
753 
754   PetscFunctionBegin;
755   /* get locally owned rows */
756   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
757   PetscCall(MatHasCongruentLayouts(A, &cong));
758   /* fix right hand side if needed */
759   if (x && b) {
760     const PetscScalar *xx;
761     PetscScalar       *bb;
762 
763     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
764     PetscCall(VecGetArrayRead(x, &xx));
765     PetscCall(VecGetArray(b, &bb));
766     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
767     PetscCall(VecRestoreArrayRead(x, &xx));
768     PetscCall(VecRestoreArray(b, &bb));
769   }
770 
771   sA = mat->A->nonzerostate;
772   sB = mat->B->nonzerostate;
773 
774   if (diag != 0.0 && cong) {
775     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
776     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
777   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
778     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
779     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
780     PetscInt    nnwA, nnwB;
781     PetscBool   nnzA, nnzB;
782 
783     nnwA = aijA->nonew;
784     nnwB = aijB->nonew;
785     nnzA = aijA->keepnonzeropattern;
786     nnzB = aijB->keepnonzeropattern;
787     if (!nnzA) {
788       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
789       aijA->nonew = 0;
790     }
791     if (!nnzB) {
792       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
793       aijB->nonew = 0;
794     }
795     /* Must zero here before the next loop */
796     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
797     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
798     for (r = 0; r < len; ++r) {
799       const PetscInt row = lrows[r] + A->rmap->rstart;
800       if (row >= A->cmap->N) continue;
801       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
802     }
803     aijA->nonew = nnwA;
804     aijB->nonew = nnwB;
805   } else {
806     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
807     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
808   }
809   PetscCall(PetscFree(lrows));
810   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
811   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
812 
813   /* reduce nonzerostate */
814   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
815   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
816   if (gch) A->nonzerostate++;
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
821   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
822   PetscMPIInt        n = A->rmap->n;
823   PetscInt           i, j, r, m, len = 0;
824   PetscInt          *lrows, *owners = A->rmap->range;
825   PetscMPIInt        p = 0;
826   PetscSFNode       *rrows;
827   PetscSF            sf;
828   const PetscScalar *xx;
829   PetscScalar       *bb, *mask, *aij_a;
830   Vec                xmask, lmask;
831   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
832   const PetscInt    *aj, *ii, *ridx;
833   PetscScalar       *aa;
834 
835   PetscFunctionBegin;
836   /* Create SF where leaves are input rows and roots are owned rows */
837   PetscCall(PetscMalloc1(n, &lrows));
838   for (r = 0; r < n; ++r) lrows[r] = -1;
839   PetscCall(PetscMalloc1(N, &rrows));
840   for (r = 0; r < N; ++r) {
841     const PetscInt idx = rows[r];
842     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
843     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
844       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
845     }
846     rrows[r].rank  = p;
847     rrows[r].index = rows[r] - owners[p];
848   }
849   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
850   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
851   /* Collect flags for rows to be zeroed */
852   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
853   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
854   PetscCall(PetscSFDestroy(&sf));
855   /* Compress and put in row numbers */
856   for (r = 0; r < n; ++r)
857     if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
860   /* handle off diagonal part of matrix */
861   PetscCall(MatCreateVecs(A, &xmask, NULL));
862   PetscCall(VecDuplicate(l->lvec, &lmask));
863   PetscCall(VecGetArray(xmask, &bb));
864   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
865   PetscCall(VecRestoreArray(xmask, &bb));
866   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
867   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
868   PetscCall(VecDestroy(&xmask));
869   if (x && b) { /* this code is buggy when the row and column layout don't match */
870     PetscBool cong;
871 
872     PetscCall(MatHasCongruentLayouts(A, &cong));
873     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
874     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
875     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
876     PetscCall(VecGetArrayRead(l->lvec, &xx));
877     PetscCall(VecGetArray(b, &bb));
878   }
879   PetscCall(VecGetArray(lmask, &mask));
880   /* remove zeroed rows of off diagonal matrix */
881   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
882   ii = aij->i;
883   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
884   /* loop over all elements of off process part of matrix zeroing removed columns*/
885   if (aij->compressedrow.use) {
886     m    = aij->compressedrow.nrows;
887     ii   = aij->compressedrow.i;
888     ridx = aij->compressedrow.rindex;
889     for (i = 0; i < m; i++) {
890       n  = ii[i + 1] - ii[i];
891       aj = aij->j + ii[i];
892       aa = aij_a + ii[i];
893 
894       for (j = 0; j < n; j++) {
895         if (PetscAbsScalar(mask[*aj])) {
896           if (b) bb[*ridx] -= *aa * xx[*aj];
897           *aa = 0.0;
898         }
899         aa++;
900         aj++;
901       }
902       ridx++;
903     }
904   } else { /* do not use compressed row format */
905     m = l->B->rmap->n;
906     for (i = 0; i < m; i++) {
907       n  = ii[i + 1] - ii[i];
908       aj = aij->j + ii[i];
909       aa = aij_a + ii[i];
910       for (j = 0; j < n; j++) {
911         if (PetscAbsScalar(mask[*aj])) {
912           if (b) bb[i] -= *aa * xx[*aj];
913           *aa = 0.0;
914         }
915         aa++;
916         aj++;
917       }
918     }
919   }
920   if (x && b) {
921     PetscCall(VecRestoreArray(b, &bb));
922     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
923   }
924   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
925   PetscCall(VecRestoreArray(lmask, &mask));
926   PetscCall(VecDestroy(&lmask));
927   PetscCall(PetscFree(lrows));
928 
929   /* only change matrix nonzero state if pattern was allowed to be changed */
930   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
931     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
932     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
933   }
934   PetscFunctionReturn(0);
935 }
936 
937 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) {
938   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
939   PetscInt    nt;
940   VecScatter  Mvctx = a->Mvctx;
941 
942   PetscFunctionBegin;
943   PetscCall(VecGetLocalSize(xx, &nt));
944   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
945   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
946   PetscUseTypeMethod(a->A, mult, xx, yy);
947   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
948   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
949   PetscFunctionReturn(0);
950 }
951 
952 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) {
953   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
954 
955   PetscFunctionBegin;
956   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
957   PetscFunctionReturn(0);
958 }
959 
960 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
961   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
962   VecScatter  Mvctx = a->Mvctx;
963 
964   PetscFunctionBegin;
965   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
966   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
967   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
968   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) {
973   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
974 
975   PetscFunctionBegin;
976   /* do nondiagonal part */
977   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
978   /* do local part */
979   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
980   /* add partial results together */
981   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
982   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
983   PetscFunctionReturn(0);
984 }
985 
986 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) {
987   MPI_Comm    comm;
988   Mat_MPIAIJ *Aij  = (Mat_MPIAIJ *)Amat->data, *Bij;
989   Mat         Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
990   IS          Me, Notme;
991   PetscInt    M, N, first, last, *notme, i;
992   PetscBool   lf;
993   PetscMPIInt size;
994 
995   PetscFunctionBegin;
996   /* Easy test: symmetric diagonal block */
997   Bij  = (Mat_MPIAIJ *)Bmat->data;
998   Bdia = Bij->A;
999   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1000   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1001   if (!*f) PetscFunctionReturn(0);
1002   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1003   PetscCallMPI(MPI_Comm_size(comm, &size));
1004   if (size == 1) PetscFunctionReturn(0);
1005 
1006   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1007   PetscCall(MatGetSize(Amat, &M, &N));
1008   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1009   PetscCall(PetscMalloc1(N - last + first, &notme));
1010   for (i = 0; i < first; i++) notme[i] = i;
1011   for (i = last; i < M; i++) notme[i - last + first] = i;
1012   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1013   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1014   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1015   Aoff = Aoffs[0];
1016   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1017   Boff = Boffs[0];
1018   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1019   PetscCall(MatDestroyMatrices(1, &Aoffs));
1020   PetscCall(MatDestroyMatrices(1, &Boffs));
1021   PetscCall(ISDestroy(&Me));
1022   PetscCall(ISDestroy(&Notme));
1023   PetscCall(PetscFree(notme));
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) {
1028   PetscFunctionBegin;
1029   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035 
1036   PetscFunctionBegin;
1037   /* do nondiagonal part */
1038   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1039   /* do local part */
1040   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1041   /* add partial results together */
1042   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1043   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 /*
1048   This only works correctly for square matrices where the subblock A->A is the
1049    diagonal block
1050 */
1051 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) {
1052   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1053 
1054   PetscFunctionBegin;
1055   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1056   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1057   PetscCall(MatGetDiagonal(a->A, v));
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) {
1062   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1063 
1064   PetscFunctionBegin;
1065   PetscCall(MatScale(a->A, aa));
1066   PetscCall(MatScale(a->B, aa));
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1071 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) {
1072   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1073 
1074   PetscFunctionBegin;
1075   PetscCall(PetscSFDestroy(&aij->coo_sf));
1076   PetscCall(PetscFree(aij->Aperm1));
1077   PetscCall(PetscFree(aij->Bperm1));
1078   PetscCall(PetscFree(aij->Ajmap1));
1079   PetscCall(PetscFree(aij->Bjmap1));
1080 
1081   PetscCall(PetscFree(aij->Aimap2));
1082   PetscCall(PetscFree(aij->Bimap2));
1083   PetscCall(PetscFree(aij->Aperm2));
1084   PetscCall(PetscFree(aij->Bperm2));
1085   PetscCall(PetscFree(aij->Ajmap2));
1086   PetscCall(PetscFree(aij->Bjmap2));
1087 
1088   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1089   PetscCall(PetscFree(aij->Cperm1));
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) {
1094   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1095 
1096   PetscFunctionBegin;
1097 #if defined(PETSC_USE_LOG)
1098   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1099 #endif
1100   PetscCall(MatStashDestroy_Private(&mat->stash));
1101   PetscCall(VecDestroy(&aij->diag));
1102   PetscCall(MatDestroy(&aij->A));
1103   PetscCall(MatDestroy(&aij->B));
1104 #if defined(PETSC_USE_CTABLE)
1105   PetscCall(PetscTableDestroy(&aij->colmap));
1106 #else
1107   PetscCall(PetscFree(aij->colmap));
1108 #endif
1109   PetscCall(PetscFree(aij->garray));
1110   PetscCall(VecDestroy(&aij->lvec));
1111   PetscCall(VecScatterDestroy(&aij->Mvctx));
1112   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
1113   PetscCall(PetscFree(aij->ld));
1114 
1115   /* Free COO */
1116   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1117 
1118   PetscCall(PetscFree(mat->data));
1119 
1120   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1121   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
1122 
1123   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1124   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1125   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1126   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
1127   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
1128   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
1129   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
1130   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1131   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
1132   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
1133 #if defined(PETSC_HAVE_CUDA)
1134   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
1135 #endif
1136 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1137   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
1138 #endif
1139   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
1140 #if defined(PETSC_HAVE_ELEMENTAL)
1141   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
1142 #endif
1143 #if defined(PETSC_HAVE_SCALAPACK)
1144   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
1145 #endif
1146 #if defined(PETSC_HAVE_HYPRE)
1147   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
1148   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
1149 #endif
1150   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1151   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
1152   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
1153   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
1154   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
1155   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
1156 #if defined(PETSC_HAVE_MKL_SPARSE)
1157   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
1158 #endif
1159   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
1160   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1161   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) {
1168   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1169   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1170   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1171   const PetscInt    *garray = aij->garray;
1172   const PetscScalar *aa, *ba;
1173   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1174   PetscInt          *rowlens;
1175   PetscInt          *colidxs;
1176   PetscScalar       *matvals;
1177 
1178   PetscFunctionBegin;
1179   PetscCall(PetscViewerSetUp(viewer));
1180 
1181   M  = mat->rmap->N;
1182   N  = mat->cmap->N;
1183   m  = mat->rmap->n;
1184   rs = mat->rmap->rstart;
1185   cs = mat->cmap->rstart;
1186   nz = A->nz + B->nz;
1187 
1188   /* write matrix header */
1189   header[0] = MAT_FILE_CLASSID;
1190   header[1] = M;
1191   header[2] = N;
1192   header[3] = nz;
1193   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1194   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1195 
1196   /* fill in and store row lengths  */
1197   PetscCall(PetscMalloc1(m, &rowlens));
1198   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1199   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1200   PetscCall(PetscFree(rowlens));
1201 
1202   /* fill in and store column indices */
1203   PetscCall(PetscMalloc1(nz, &colidxs));
1204   for (cnt = 0, i = 0; i < m; i++) {
1205     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1206       if (garray[B->j[jb]] > cs) break;
1207       colidxs[cnt++] = garray[B->j[jb]];
1208     }
1209     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1210     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1211   }
1212   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1213   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1214   PetscCall(PetscFree(colidxs));
1215 
1216   /* fill in and store nonzero values */
1217   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1218   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1219   PetscCall(PetscMalloc1(nz, &matvals));
1220   for (cnt = 0, i = 0; i < m; i++) {
1221     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1222       if (garray[B->j[jb]] > cs) break;
1223       matvals[cnt++] = ba[jb];
1224     }
1225     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1226     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1227   }
1228   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1229   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1230   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1231   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1232   PetscCall(PetscFree(matvals));
1233 
1234   /* write block size option to the viewer's .info file */
1235   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #include <petscdraw.h>
1240 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
1241   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1242   PetscMPIInt       rank = aij->rank, size = aij->size;
1243   PetscBool         isdraw, iascii, isbinary;
1244   PetscViewer       sviewer;
1245   PetscViewerFormat format;
1246 
1247   PetscFunctionBegin;
1248   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1249   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1250   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1251   if (iascii) {
1252     PetscCall(PetscViewerGetFormat(viewer, &format));
1253     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1254       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1255       PetscCall(PetscMalloc1(size, &nz));
1256       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1257       for (i = 0; i < (PetscInt)size; i++) {
1258         nmax = PetscMax(nmax, nz[i]);
1259         nmin = PetscMin(nmin, nz[i]);
1260         navg += nz[i];
1261       }
1262       PetscCall(PetscFree(nz));
1263       navg = navg / size;
1264       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1265       PetscFunctionReturn(0);
1266     }
1267     PetscCall(PetscViewerGetFormat(viewer, &format));
1268     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1269       MatInfo   info;
1270       PetscInt *inodes = NULL;
1271 
1272       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1273       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1274       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1275       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1276       if (!inodes) {
1277         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1278                                                      (double)info.memory));
1279       } else {
1280         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1281                                                      (double)info.memory));
1282       }
1283       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1284       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1285       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1286       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1287       PetscCall(PetscViewerFlush(viewer));
1288       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1289       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1290       PetscCall(VecScatterView(aij->Mvctx, viewer));
1291       PetscFunctionReturn(0);
1292     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1293       PetscInt inodecount, inodelimit, *inodes;
1294       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1295       if (inodes) {
1296         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1297       } else {
1298         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1299       }
1300       PetscFunctionReturn(0);
1301     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1302       PetscFunctionReturn(0);
1303     }
1304   } else if (isbinary) {
1305     if (size == 1) {
1306       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1307       PetscCall(MatView(aij->A, viewer));
1308     } else {
1309       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1310     }
1311     PetscFunctionReturn(0);
1312   } else if (iascii && size == 1) {
1313     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1314     PetscCall(MatView(aij->A, viewer));
1315     PetscFunctionReturn(0);
1316   } else if (isdraw) {
1317     PetscDraw draw;
1318     PetscBool isnull;
1319     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1320     PetscCall(PetscDrawIsNull(draw, &isnull));
1321     if (isnull) PetscFunctionReturn(0);
1322   }
1323 
1324   { /* assemble the entire matrix onto first processor */
1325     Mat A = NULL, Av;
1326     IS  isrow, iscol;
1327 
1328     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1329     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1330     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1331     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1332     /*  The commented code uses MatCreateSubMatrices instead */
1333     /*
1334     Mat *AA, A = NULL, Av;
1335     IS  isrow,iscol;
1336 
1337     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1338     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1339     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1340     if (rank == 0) {
1341        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1342        A    = AA[0];
1343        Av   = AA[0];
1344     }
1345     PetscCall(MatDestroySubMatrices(1,&AA));
1346 */
1347     PetscCall(ISDestroy(&iscol));
1348     PetscCall(ISDestroy(&isrow));
1349     /*
1350        Everyone has to call to draw the matrix since the graphics waits are
1351        synchronized across all processors that share the PetscDraw object
1352     */
1353     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1354     if (rank == 0) {
1355       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1356       PetscCall(MatView_SeqAIJ(Av, sviewer));
1357     }
1358     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1359     PetscCall(PetscViewerFlush(viewer));
1360     PetscCall(MatDestroy(&A));
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) {
1366   PetscBool iascii, isdraw, issocket, isbinary;
1367 
1368   PetscFunctionBegin;
1369   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1370   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1371   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1372   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1373   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
1378   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1379   Vec         bb1 = NULL;
1380   PetscBool   hasop;
1381 
1382   PetscFunctionBegin;
1383   if (flag == SOR_APPLY_UPPER) {
1384     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1385     PetscFunctionReturn(0);
1386   }
1387 
1388   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1389 
1390   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1391     if (flag & SOR_ZERO_INITIAL_GUESS) {
1392       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1393       its--;
1394     }
1395 
1396     while (its--) {
1397       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1398       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1399 
1400       /* update rhs: bb1 = bb - B*x */
1401       PetscCall(VecScale(mat->lvec, -1.0));
1402       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1403 
1404       /* local sweep */
1405       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1406     }
1407   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1408     if (flag & SOR_ZERO_INITIAL_GUESS) {
1409       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1410       its--;
1411     }
1412     while (its--) {
1413       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1414       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1415 
1416       /* update rhs: bb1 = bb - B*x */
1417       PetscCall(VecScale(mat->lvec, -1.0));
1418       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1419 
1420       /* local sweep */
1421       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1422     }
1423   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1424     if (flag & SOR_ZERO_INITIAL_GUESS) {
1425       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1426       its--;
1427     }
1428     while (its--) {
1429       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1430       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1431 
1432       /* update rhs: bb1 = bb - B*x */
1433       PetscCall(VecScale(mat->lvec, -1.0));
1434       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1435 
1436       /* local sweep */
1437       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1438     }
1439   } else if (flag & SOR_EISENSTAT) {
1440     Vec xx1;
1441 
1442     PetscCall(VecDuplicate(bb, &xx1));
1443     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1444 
1445     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1446     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1447     if (!mat->diag) {
1448       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1449       PetscCall(MatGetDiagonal(matin, mat->diag));
1450     }
1451     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1452     if (hasop) {
1453       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1454     } else {
1455       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1456     }
1457     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1458 
1459     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1460 
1461     /* local sweep */
1462     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1463     PetscCall(VecAXPY(xx, 1.0, xx1));
1464     PetscCall(VecDestroy(&xx1));
1465   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1466 
1467   PetscCall(VecDestroy(&bb1));
1468 
1469   matin->factorerrortype = mat->A->factorerrortype;
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) {
1474   Mat             aA, aB, Aperm;
1475   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1476   PetscScalar    *aa, *ba;
1477   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1478   PetscSF         rowsf, sf;
1479   IS              parcolp = NULL;
1480   PetscBool       done;
1481 
1482   PetscFunctionBegin;
1483   PetscCall(MatGetLocalSize(A, &m, &n));
1484   PetscCall(ISGetIndices(rowp, &rwant));
1485   PetscCall(ISGetIndices(colp, &cwant));
1486   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1487 
1488   /* Invert row permutation to find out where my rows should go */
1489   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1490   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1491   PetscCall(PetscSFSetFromOptions(rowsf));
1492   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1493   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1494   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1495 
1496   /* Invert column permutation to find out where my columns should go */
1497   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1498   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1499   PetscCall(PetscSFSetFromOptions(sf));
1500   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1501   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1502   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1503   PetscCall(PetscSFDestroy(&sf));
1504 
1505   PetscCall(ISRestoreIndices(rowp, &rwant));
1506   PetscCall(ISRestoreIndices(colp, &cwant));
1507   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1508 
1509   /* Find out where my gcols should go */
1510   PetscCall(MatGetSize(aB, NULL, &ng));
1511   PetscCall(PetscMalloc1(ng, &gcdest));
1512   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1513   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1514   PetscCall(PetscSFSetFromOptions(sf));
1515   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1516   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1517   PetscCall(PetscSFDestroy(&sf));
1518 
1519   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1520   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1521   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1522   for (i = 0; i < m; i++) {
1523     PetscInt    row = rdest[i];
1524     PetscMPIInt rowner;
1525     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1526     for (j = ai[i]; j < ai[i + 1]; j++) {
1527       PetscInt    col = cdest[aj[j]];
1528       PetscMPIInt cowner;
1529       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1530       if (rowner == cowner) dnnz[i]++;
1531       else onnz[i]++;
1532     }
1533     for (j = bi[i]; j < bi[i + 1]; j++) {
1534       PetscInt    col = gcdest[bj[j]];
1535       PetscMPIInt cowner;
1536       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1537       if (rowner == cowner) dnnz[i]++;
1538       else onnz[i]++;
1539     }
1540   }
1541   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1542   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1543   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1544   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1545   PetscCall(PetscSFDestroy(&rowsf));
1546 
1547   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1548   PetscCall(MatSeqAIJGetArray(aA, &aa));
1549   PetscCall(MatSeqAIJGetArray(aB, &ba));
1550   for (i = 0; i < m; i++) {
1551     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1552     PetscInt  j0, rowlen;
1553     rowlen = ai[i + 1] - ai[i];
1554     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1555       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1556       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1557     }
1558     rowlen = bi[i + 1] - bi[i];
1559     for (j0 = j = 0; j < rowlen; j0 = j) {
1560       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1561       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1562     }
1563   }
1564   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1565   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1566   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1567   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1568   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1569   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1570   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1571   PetscCall(PetscFree3(work, rdest, cdest));
1572   PetscCall(PetscFree(gcdest));
1573   if (parcolp) PetscCall(ISDestroy(&colp));
1574   *B = Aperm;
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) {
1579   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1580 
1581   PetscFunctionBegin;
1582   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1583   if (ghosts) *ghosts = aij->garray;
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) {
1588   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1589   Mat            A = mat->A, B = mat->B;
1590   PetscLogDouble isend[5], irecv[5];
1591 
1592   PetscFunctionBegin;
1593   info->block_size = 1.0;
1594   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1595 
1596   isend[0] = info->nz_used;
1597   isend[1] = info->nz_allocated;
1598   isend[2] = info->nz_unneeded;
1599   isend[3] = info->memory;
1600   isend[4] = info->mallocs;
1601 
1602   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1603 
1604   isend[0] += info->nz_used;
1605   isend[1] += info->nz_allocated;
1606   isend[2] += info->nz_unneeded;
1607   isend[3] += info->memory;
1608   isend[4] += info->mallocs;
1609   if (flag == MAT_LOCAL) {
1610     info->nz_used      = isend[0];
1611     info->nz_allocated = isend[1];
1612     info->nz_unneeded  = isend[2];
1613     info->memory       = isend[3];
1614     info->mallocs      = isend[4];
1615   } else if (flag == MAT_GLOBAL_MAX) {
1616     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1617 
1618     info->nz_used      = irecv[0];
1619     info->nz_allocated = irecv[1];
1620     info->nz_unneeded  = irecv[2];
1621     info->memory       = irecv[3];
1622     info->mallocs      = irecv[4];
1623   } else if (flag == MAT_GLOBAL_SUM) {
1624     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1625 
1626     info->nz_used      = irecv[0];
1627     info->nz_allocated = irecv[1];
1628     info->nz_unneeded  = irecv[2];
1629     info->memory       = irecv[3];
1630     info->mallocs      = irecv[4];
1631   }
1632   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1633   info->fill_ratio_needed = 0;
1634   info->factor_mallocs    = 0;
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) {
1639   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1640 
1641   PetscFunctionBegin;
1642   switch (op) {
1643   case MAT_NEW_NONZERO_LOCATIONS:
1644   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1645   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1646   case MAT_KEEP_NONZERO_PATTERN:
1647   case MAT_NEW_NONZERO_LOCATION_ERR:
1648   case MAT_USE_INODES:
1649   case MAT_IGNORE_ZERO_ENTRIES:
1650   case MAT_FORM_EXPLICIT_TRANSPOSE:
1651     MatCheckPreallocated(A, 1);
1652     PetscCall(MatSetOption(a->A, op, flg));
1653     PetscCall(MatSetOption(a->B, op, flg));
1654     break;
1655   case MAT_ROW_ORIENTED:
1656     MatCheckPreallocated(A, 1);
1657     a->roworiented = flg;
1658 
1659     PetscCall(MatSetOption(a->A, op, flg));
1660     PetscCall(MatSetOption(a->B, op, flg));
1661     break;
1662   case MAT_FORCE_DIAGONAL_ENTRIES:
1663   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
1664   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
1665   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1666   case MAT_SPD:
1667   case MAT_SYMMETRIC:
1668   case MAT_STRUCTURALLY_SYMMETRIC:
1669   case MAT_HERMITIAN:
1670   case MAT_SYMMETRY_ETERNAL:
1671   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1672   case MAT_SPD_ETERNAL:
1673     /* if the diagonal matrix is square it inherits some of the properties above */
1674     break;
1675   case MAT_SUBMAT_SINGLEIS: A->submat_singleis = flg; break;
1676   case MAT_STRUCTURE_ONLY:
1677     /* The option is handled directly by MatSetOption() */
1678     break;
1679   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1680   }
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1685   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1686   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1687   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1688   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1689   PetscInt    *cmap, *idx_p;
1690 
1691   PetscFunctionBegin;
1692   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1693   mat->getrowactive = PETSC_TRUE;
1694 
1695   if (!mat->rowvalues && (idx || v)) {
1696     /*
1697         allocate enough space to hold information from the longest row.
1698     */
1699     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1700     PetscInt    max = 1, tmp;
1701     for (i = 0; i < matin->rmap->n; i++) {
1702       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1703       if (max < tmp) max = tmp;
1704     }
1705     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1706   }
1707 
1708   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1709   lrow = row - rstart;
1710 
1711   pvA = &vworkA;
1712   pcA = &cworkA;
1713   pvB = &vworkB;
1714   pcB = &cworkB;
1715   if (!v) {
1716     pvA = NULL;
1717     pvB = NULL;
1718   }
1719   if (!idx) {
1720     pcA = NULL;
1721     if (!v) pcB = NULL;
1722   }
1723   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1724   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1725   nztot = nzA + nzB;
1726 
1727   cmap = mat->garray;
1728   if (v || idx) {
1729     if (nztot) {
1730       /* Sort by increasing column numbers, assuming A and B already sorted */
1731       PetscInt imark = -1;
1732       if (v) {
1733         *v = v_p = mat->rowvalues;
1734         for (i = 0; i < nzB; i++) {
1735           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1736           else break;
1737         }
1738         imark = i;
1739         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1740         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1741       }
1742       if (idx) {
1743         *idx = idx_p = mat->rowindices;
1744         if (imark > -1) {
1745           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1746         } else {
1747           for (i = 0; i < nzB; i++) {
1748             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1749             else break;
1750           }
1751           imark = i;
1752         }
1753         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1754         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1755       }
1756     } else {
1757       if (idx) *idx = NULL;
1758       if (v) *v = NULL;
1759     }
1760   }
1761   *nz = nztot;
1762   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1763   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1768   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1769 
1770   PetscFunctionBegin;
1771   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1772   aij->getrowactive = PETSC_FALSE;
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) {
1777   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1778   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1779   PetscInt         i, j, cstart = mat->cmap->rstart;
1780   PetscReal        sum = 0.0;
1781   const MatScalar *v, *amata, *bmata;
1782 
1783   PetscFunctionBegin;
1784   if (aij->size == 1) {
1785     PetscCall(MatNorm(aij->A, type, norm));
1786   } else {
1787     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1788     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1789     if (type == NORM_FROBENIUS) {
1790       v = amata;
1791       for (i = 0; i < amat->nz; i++) {
1792         sum += PetscRealPart(PetscConj(*v) * (*v));
1793         v++;
1794       }
1795       v = bmata;
1796       for (i = 0; i < bmat->nz; i++) {
1797         sum += PetscRealPart(PetscConj(*v) * (*v));
1798         v++;
1799       }
1800       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1801       *norm = PetscSqrtReal(*norm);
1802       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1803     } else if (type == NORM_1) { /* max column norm */
1804       PetscReal *tmp, *tmp2;
1805       PetscInt  *jj, *garray = aij->garray;
1806       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1807       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1808       *norm = 0.0;
1809       v     = amata;
1810       jj    = amat->j;
1811       for (j = 0; j < amat->nz; j++) {
1812         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1813         v++;
1814       }
1815       v  = bmata;
1816       jj = bmat->j;
1817       for (j = 0; j < bmat->nz; j++) {
1818         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1819         v++;
1820       }
1821       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1822       for (j = 0; j < mat->cmap->N; j++) {
1823         if (tmp2[j] > *norm) *norm = tmp2[j];
1824       }
1825       PetscCall(PetscFree(tmp));
1826       PetscCall(PetscFree(tmp2));
1827       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1828     } else if (type == NORM_INFINITY) { /* max row norm */
1829       PetscReal ntemp = 0.0;
1830       for (j = 0; j < aij->A->rmap->n; j++) {
1831         v   = amata + amat->i[j];
1832         sum = 0.0;
1833         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1834           sum += PetscAbsScalar(*v);
1835           v++;
1836         }
1837         v = bmata + bmat->i[j];
1838         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1839           sum += PetscAbsScalar(*v);
1840           v++;
1841         }
1842         if (sum > ntemp) ntemp = sum;
1843       }
1844       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1845       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1846     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1847     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1848     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1849   }
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) {
1854   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1855   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1856   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1857   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1858   Mat              B, A_diag, *B_diag;
1859   const MatScalar *pbv, *bv;
1860 
1861   PetscFunctionBegin;
1862   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1863   ma = A->rmap->n;
1864   na = A->cmap->n;
1865   mb = a->B->rmap->n;
1866   nb = a->B->cmap->n;
1867   ai = Aloc->i;
1868   aj = Aloc->j;
1869   bi = Bloc->i;
1870   bj = Bloc->j;
1871   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1872     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1873     PetscSFNode         *oloc;
1874     PETSC_UNUSED PetscSF sf;
1875 
1876     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1877     /* compute d_nnz for preallocation */
1878     PetscCall(PetscArrayzero(d_nnz, na));
1879     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1880     /* compute local off-diagonal contributions */
1881     PetscCall(PetscArrayzero(g_nnz, nb));
1882     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1883     /* map those to global */
1884     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1885     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1886     PetscCall(PetscSFSetFromOptions(sf));
1887     PetscCall(PetscArrayzero(o_nnz, na));
1888     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1889     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1890     PetscCall(PetscSFDestroy(&sf));
1891 
1892     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1893     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1894     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1895     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1896     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1897     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1898   } else {
1899     B = *matout;
1900     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1901   }
1902 
1903   b           = (Mat_MPIAIJ *)B->data;
1904   A_diag      = a->A;
1905   B_diag      = &b->A;
1906   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1907   A_diag_ncol = A_diag->cmap->N;
1908   B_diag_ilen = sub_B_diag->ilen;
1909   B_diag_i    = sub_B_diag->i;
1910 
1911   /* Set ilen for diagonal of B */
1912   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1913 
1914   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1915   very quickly (=without using MatSetValues), because all writes are local. */
1916   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1917   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1918 
1919   /* copy over the B part */
1920   PetscCall(PetscMalloc1(bi[mb], &cols));
1921   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1922   pbv = bv;
1923   row = A->rmap->rstart;
1924   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1925   cols_tmp = cols;
1926   for (i = 0; i < mb; i++) {
1927     ncol = bi[i + 1] - bi[i];
1928     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1929     row++;
1930     pbv += ncol;
1931     cols_tmp += ncol;
1932   }
1933   PetscCall(PetscFree(cols));
1934   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1935 
1936   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1937   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1938   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1939     *matout = B;
1940   } else {
1941     PetscCall(MatHeaderMerge(A, &B));
1942   }
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) {
1947   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1948   Mat         a = aij->A, b = aij->B;
1949   PetscInt    s1, s2, s3;
1950 
1951   PetscFunctionBegin;
1952   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1953   if (rr) {
1954     PetscCall(VecGetLocalSize(rr, &s1));
1955     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1956     /* Overlap communication with computation. */
1957     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1958   }
1959   if (ll) {
1960     PetscCall(VecGetLocalSize(ll, &s1));
1961     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1962     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1963   }
1964   /* scale  the diagonal block */
1965   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1966 
1967   if (rr) {
1968     /* Do a scatter end and then right scale the off-diagonal block */
1969     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1970     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1971   }
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) {
1976   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1977 
1978   PetscFunctionBegin;
1979   PetscCall(MatSetUnfactored(a->A));
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) {
1984   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
1985   Mat         a, b, c, d;
1986   PetscBool   flg;
1987 
1988   PetscFunctionBegin;
1989   a = matA->A;
1990   b = matA->B;
1991   c = matB->A;
1992   d = matB->B;
1993 
1994   PetscCall(MatEqual(a, c, &flg));
1995   if (flg) PetscCall(MatEqual(b, d, &flg));
1996   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) {
2001   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2002   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2003 
2004   PetscFunctionBegin;
2005   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2006   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2007     /* because of the column compression in the off-processor part of the matrix a->B,
2008        the number of columns in a->B and b->B may be different, hence we cannot call
2009        the MatCopy() directly on the two parts. If need be, we can provide a more
2010        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2011        then copying the submatrices */
2012     PetscCall(MatCopy_Basic(A, B, str));
2013   } else {
2014     PetscCall(MatCopy(a->A, b->A, str));
2015     PetscCall(MatCopy(a->B, b->B, str));
2016   }
2017   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatSetUp_MPIAIJ(Mat A) {
2022   PetscFunctionBegin;
2023   PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 /*
2028    Computes the number of nonzeros per row needed for preallocation when X and Y
2029    have different nonzero structure.
2030 */
2031 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) {
2032   PetscInt i, j, k, nzx, nzy;
2033 
2034   PetscFunctionBegin;
2035   /* Set the number of nonzeros in the new matrix */
2036   for (i = 0; i < m; i++) {
2037     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2038     nzx    = xi[i + 1] - xi[i];
2039     nzy    = yi[i + 1] - yi[i];
2040     nnz[i] = 0;
2041     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2042       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2043       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2044       nnz[i]++;
2045     }
2046     for (; k < nzy; k++) nnz[i]++;
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2052 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) {
2053   PetscInt    m = Y->rmap->N;
2054   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2055   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2056 
2057   PetscFunctionBegin;
2058   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
2063   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2064 
2065   PetscFunctionBegin;
2066   if (str == SAME_NONZERO_PATTERN) {
2067     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2068     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2069   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2070     PetscCall(MatAXPY_Basic(Y, a, X, str));
2071   } else {
2072     Mat       B;
2073     PetscInt *nnz_d, *nnz_o;
2074 
2075     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2076     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2077     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2078     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2079     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2080     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2081     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2082     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2083     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2084     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2085     PetscCall(MatHeaderMerge(Y, &B));
2086     PetscCall(PetscFree(nnz_d));
2087     PetscCall(PetscFree(nnz_o));
2088   }
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2093 
2094 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) {
2095   PetscFunctionBegin;
2096   if (PetscDefined(USE_COMPLEX)) {
2097     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2098 
2099     PetscCall(MatConjugate_SeqAIJ(aij->A));
2100     PetscCall(MatConjugate_SeqAIJ(aij->B));
2101   }
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 PetscErrorCode MatRealPart_MPIAIJ(Mat A) {
2106   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2107 
2108   PetscFunctionBegin;
2109   PetscCall(MatRealPart(a->A));
2110   PetscCall(MatRealPart(a->B));
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) {
2115   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2116 
2117   PetscFunctionBegin;
2118   PetscCall(MatImaginaryPart(a->A));
2119   PetscCall(MatImaginaryPart(a->B));
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2124   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2125   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2126   PetscScalar       *va, *vv;
2127   Vec                vB, vA;
2128   const PetscScalar *vb;
2129 
2130   PetscFunctionBegin;
2131   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2132   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2133 
2134   PetscCall(VecGetArrayWrite(vA, &va));
2135   if (idx) {
2136     for (i = 0; i < m; i++) {
2137       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2138     }
2139   }
2140 
2141   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2142   PetscCall(PetscMalloc1(m, &idxb));
2143   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2144 
2145   PetscCall(VecGetArrayWrite(v, &vv));
2146   PetscCall(VecGetArrayRead(vB, &vb));
2147   for (i = 0; i < m; i++) {
2148     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2149       vv[i] = vb[i];
2150       if (idx) idx[i] = a->garray[idxb[i]];
2151     } else {
2152       vv[i] = va[i];
2153       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2154     }
2155   }
2156   PetscCall(VecRestoreArrayWrite(vA, &vv));
2157   PetscCall(VecRestoreArrayWrite(vA, &va));
2158   PetscCall(VecRestoreArrayRead(vB, &vb));
2159   PetscCall(PetscFree(idxb));
2160   PetscCall(VecDestroy(&vA));
2161   PetscCall(VecDestroy(&vB));
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2166   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2167   PetscInt           m = A->rmap->n, n = A->cmap->n;
2168   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2169   PetscInt          *cmap = mat->garray;
2170   PetscInt          *diagIdx, *offdiagIdx;
2171   Vec                diagV, offdiagV;
2172   PetscScalar       *a, *diagA, *offdiagA;
2173   const PetscScalar *ba, *bav;
2174   PetscInt           r, j, col, ncols, *bi, *bj;
2175   Mat                B = mat->B;
2176   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2177 
2178   PetscFunctionBegin;
2179   /* When a process holds entire A and other processes have no entry */
2180   if (A->cmap->N == n) {
2181     PetscCall(VecGetArrayWrite(v, &diagA));
2182     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2183     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2184     PetscCall(VecDestroy(&diagV));
2185     PetscCall(VecRestoreArrayWrite(v, &diagA));
2186     PetscFunctionReturn(0);
2187   } else if (n == 0) {
2188     if (m) {
2189       PetscCall(VecGetArrayWrite(v, &a));
2190       for (r = 0; r < m; r++) {
2191         a[r] = 0.0;
2192         if (idx) idx[r] = -1;
2193       }
2194       PetscCall(VecRestoreArrayWrite(v, &a));
2195     }
2196     PetscFunctionReturn(0);
2197   }
2198 
2199   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2200   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2201   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2202   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2203 
2204   /* Get offdiagIdx[] for implicit 0.0 */
2205   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2206   ba = bav;
2207   bi = b->i;
2208   bj = b->j;
2209   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2210   for (r = 0; r < m; r++) {
2211     ncols = bi[r + 1] - bi[r];
2212     if (ncols == A->cmap->N - n) { /* Brow is dense */
2213       offdiagA[r]   = *ba;
2214       offdiagIdx[r] = cmap[0];
2215     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2216       offdiagA[r] = 0.0;
2217 
2218       /* Find first hole in the cmap */
2219       for (j = 0; j < ncols; j++) {
2220         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2221         if (col > j && j < cstart) {
2222           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2223           break;
2224         } else if (col > j + n && j >= cstart) {
2225           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2226           break;
2227         }
2228       }
2229       if (j == ncols && ncols < A->cmap->N - n) {
2230         /* a hole is outside compressed Bcols */
2231         if (ncols == 0) {
2232           if (cstart) {
2233             offdiagIdx[r] = 0;
2234           } else offdiagIdx[r] = cend;
2235         } else { /* ncols > 0 */
2236           offdiagIdx[r] = cmap[ncols - 1] + 1;
2237           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2238         }
2239       }
2240     }
2241 
2242     for (j = 0; j < ncols; j++) {
2243       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2244         offdiagA[r]   = *ba;
2245         offdiagIdx[r] = cmap[*bj];
2246       }
2247       ba++;
2248       bj++;
2249     }
2250   }
2251 
2252   PetscCall(VecGetArrayWrite(v, &a));
2253   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2254   for (r = 0; r < m; ++r) {
2255     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2256       a[r] = diagA[r];
2257       if (idx) idx[r] = cstart + diagIdx[r];
2258     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2259       a[r] = diagA[r];
2260       if (idx) {
2261         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2262           idx[r] = cstart + diagIdx[r];
2263         } else idx[r] = offdiagIdx[r];
2264       }
2265     } else {
2266       a[r] = offdiagA[r];
2267       if (idx) idx[r] = offdiagIdx[r];
2268     }
2269   }
2270   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2271   PetscCall(VecRestoreArrayWrite(v, &a));
2272   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2273   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2274   PetscCall(VecDestroy(&diagV));
2275   PetscCall(VecDestroy(&offdiagV));
2276   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2281   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2282   PetscInt           m = A->rmap->n, n = A->cmap->n;
2283   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2284   PetscInt          *cmap = mat->garray;
2285   PetscInt          *diagIdx, *offdiagIdx;
2286   Vec                diagV, offdiagV;
2287   PetscScalar       *a, *diagA, *offdiagA;
2288   const PetscScalar *ba, *bav;
2289   PetscInt           r, j, col, ncols, *bi, *bj;
2290   Mat                B = mat->B;
2291   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2292 
2293   PetscFunctionBegin;
2294   /* When a process holds entire A and other processes have no entry */
2295   if (A->cmap->N == n) {
2296     PetscCall(VecGetArrayWrite(v, &diagA));
2297     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2298     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2299     PetscCall(VecDestroy(&diagV));
2300     PetscCall(VecRestoreArrayWrite(v, &diagA));
2301     PetscFunctionReturn(0);
2302   } else if (n == 0) {
2303     if (m) {
2304       PetscCall(VecGetArrayWrite(v, &a));
2305       for (r = 0; r < m; r++) {
2306         a[r] = PETSC_MAX_REAL;
2307         if (idx) idx[r] = -1;
2308       }
2309       PetscCall(VecRestoreArrayWrite(v, &a));
2310     }
2311     PetscFunctionReturn(0);
2312   }
2313 
2314   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2315   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2316   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2317   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2318 
2319   /* Get offdiagIdx[] for implicit 0.0 */
2320   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2321   ba = bav;
2322   bi = b->i;
2323   bj = b->j;
2324   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2325   for (r = 0; r < m; r++) {
2326     ncols = bi[r + 1] - bi[r];
2327     if (ncols == A->cmap->N - n) { /* Brow is dense */
2328       offdiagA[r]   = *ba;
2329       offdiagIdx[r] = cmap[0];
2330     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2331       offdiagA[r] = 0.0;
2332 
2333       /* Find first hole in the cmap */
2334       for (j = 0; j < ncols; j++) {
2335         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2336         if (col > j && j < cstart) {
2337           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2338           break;
2339         } else if (col > j + n && j >= cstart) {
2340           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2341           break;
2342         }
2343       }
2344       if (j == ncols && ncols < A->cmap->N - n) {
2345         /* a hole is outside compressed Bcols */
2346         if (ncols == 0) {
2347           if (cstart) {
2348             offdiagIdx[r] = 0;
2349           } else offdiagIdx[r] = cend;
2350         } else { /* ncols > 0 */
2351           offdiagIdx[r] = cmap[ncols - 1] + 1;
2352           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2353         }
2354       }
2355     }
2356 
2357     for (j = 0; j < ncols; j++) {
2358       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2359         offdiagA[r]   = *ba;
2360         offdiagIdx[r] = cmap[*bj];
2361       }
2362       ba++;
2363       bj++;
2364     }
2365   }
2366 
2367   PetscCall(VecGetArrayWrite(v, &a));
2368   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2369   for (r = 0; r < m; ++r) {
2370     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2371       a[r] = diagA[r];
2372       if (idx) idx[r] = cstart + diagIdx[r];
2373     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2374       a[r] = diagA[r];
2375       if (idx) {
2376         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2377           idx[r] = cstart + diagIdx[r];
2378         } else idx[r] = offdiagIdx[r];
2379       }
2380     } else {
2381       a[r] = offdiagA[r];
2382       if (idx) idx[r] = offdiagIdx[r];
2383     }
2384   }
2385   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2386   PetscCall(VecRestoreArrayWrite(v, &a));
2387   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2388   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2389   PetscCall(VecDestroy(&diagV));
2390   PetscCall(VecDestroy(&offdiagV));
2391   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2396   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2397   PetscInt           m = A->rmap->n, n = A->cmap->n;
2398   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2399   PetscInt          *cmap = mat->garray;
2400   PetscInt          *diagIdx, *offdiagIdx;
2401   Vec                diagV, offdiagV;
2402   PetscScalar       *a, *diagA, *offdiagA;
2403   const PetscScalar *ba, *bav;
2404   PetscInt           r, j, col, ncols, *bi, *bj;
2405   Mat                B = mat->B;
2406   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2407 
2408   PetscFunctionBegin;
2409   /* When a process holds entire A and other processes have no entry */
2410   if (A->cmap->N == n) {
2411     PetscCall(VecGetArrayWrite(v, &diagA));
2412     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2413     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2414     PetscCall(VecDestroy(&diagV));
2415     PetscCall(VecRestoreArrayWrite(v, &diagA));
2416     PetscFunctionReturn(0);
2417   } else if (n == 0) {
2418     if (m) {
2419       PetscCall(VecGetArrayWrite(v, &a));
2420       for (r = 0; r < m; r++) {
2421         a[r] = PETSC_MIN_REAL;
2422         if (idx) idx[r] = -1;
2423       }
2424       PetscCall(VecRestoreArrayWrite(v, &a));
2425     }
2426     PetscFunctionReturn(0);
2427   }
2428 
2429   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2430   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2431   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2432   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2433 
2434   /* Get offdiagIdx[] for implicit 0.0 */
2435   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2436   ba = bav;
2437   bi = b->i;
2438   bj = b->j;
2439   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2440   for (r = 0; r < m; r++) {
2441     ncols = bi[r + 1] - bi[r];
2442     if (ncols == A->cmap->N - n) { /* Brow is dense */
2443       offdiagA[r]   = *ba;
2444       offdiagIdx[r] = cmap[0];
2445     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2446       offdiagA[r] = 0.0;
2447 
2448       /* Find first hole in the cmap */
2449       for (j = 0; j < ncols; j++) {
2450         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2451         if (col > j && j < cstart) {
2452           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2453           break;
2454         } else if (col > j + n && j >= cstart) {
2455           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2456           break;
2457         }
2458       }
2459       if (j == ncols && ncols < A->cmap->N - n) {
2460         /* a hole is outside compressed Bcols */
2461         if (ncols == 0) {
2462           if (cstart) {
2463             offdiagIdx[r] = 0;
2464           } else offdiagIdx[r] = cend;
2465         } else { /* ncols > 0 */
2466           offdiagIdx[r] = cmap[ncols - 1] + 1;
2467           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2468         }
2469       }
2470     }
2471 
2472     for (j = 0; j < ncols; j++) {
2473       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2474         offdiagA[r]   = *ba;
2475         offdiagIdx[r] = cmap[*bj];
2476       }
2477       ba++;
2478       bj++;
2479     }
2480   }
2481 
2482   PetscCall(VecGetArrayWrite(v, &a));
2483   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2484   for (r = 0; r < m; ++r) {
2485     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2486       a[r] = diagA[r];
2487       if (idx) idx[r] = cstart + diagIdx[r];
2488     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2489       a[r] = diagA[r];
2490       if (idx) {
2491         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2492           idx[r] = cstart + diagIdx[r];
2493         } else idx[r] = offdiagIdx[r];
2494       }
2495     } else {
2496       a[r] = offdiagA[r];
2497       if (idx) idx[r] = offdiagIdx[r];
2498     }
2499   }
2500   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2501   PetscCall(VecRestoreArrayWrite(v, &a));
2502   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2503   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2504   PetscCall(VecDestroy(&diagV));
2505   PetscCall(VecDestroy(&offdiagV));
2506   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) {
2511   Mat *dummy;
2512 
2513   PetscFunctionBegin;
2514   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2515   *newmat = *dummy;
2516   PetscCall(PetscFree(dummy));
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) {
2521   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2522 
2523   PetscFunctionBegin;
2524   PetscCall(MatInvertBlockDiagonal(a->A, values));
2525   A->factorerrortype = a->A->factorerrortype;
2526   PetscFunctionReturn(0);
2527 }
2528 
2529 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) {
2530   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2531 
2532   PetscFunctionBegin;
2533   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2534   PetscCall(MatSetRandom(aij->A, rctx));
2535   if (x->assembled) {
2536     PetscCall(MatSetRandom(aij->B, rctx));
2537   } else {
2538     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2539   }
2540   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2541   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) {
2546   PetscFunctionBegin;
2547   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2548   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 /*@
2553    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2554 
2555    Not collective
2556 
2557    Input Parameter:
2558 .    A - the matrix
2559 
2560    Output Parameter:
2561 .    nz - the number of nonzeros
2562 
2563  Level: advanced
2564 
2565 .seealso: `MATMPIAIJ`, `Mat`
2566 @*/
2567 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) {
2568   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2569   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2570 
2571   PetscFunctionBegin;
2572   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 /*@
2577    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2578 
2579    Collective on A
2580 
2581    Input Parameters:
2582 +    A - the matrix
2583 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2584 
2585  Level: advanced
2586 
2587 @*/
2588 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) {
2589   PetscFunctionBegin;
2590   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) {
2595   PetscBool sc = PETSC_FALSE, flg;
2596 
2597   PetscFunctionBegin;
2598   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2599   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2600   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2601   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2602   PetscOptionsHeadEnd();
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) {
2607   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2608   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2609 
2610   PetscFunctionBegin;
2611   if (!Y->preallocated) {
2612     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2613   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2614     PetscInt nonew = aij->nonew;
2615     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2616     aij->nonew = nonew;
2617   }
2618   PetscCall(MatShift_Basic(Y, a));
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) {
2623   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2624 
2625   PetscFunctionBegin;
2626   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2627   PetscCall(MatMissingDiagonal(a->A, missing, d));
2628   if (d) {
2629     PetscInt rstart;
2630     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2631     *d += rstart;
2632   }
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) {
2637   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2638 
2639   PetscFunctionBegin;
2640   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2641   PetscFunctionReturn(0);
2642 }
2643 
2644 /* -------------------------------------------------------------------*/
2645 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2646                                        MatGetRow_MPIAIJ,
2647                                        MatRestoreRow_MPIAIJ,
2648                                        MatMult_MPIAIJ,
2649                                        /* 4*/ MatMultAdd_MPIAIJ,
2650                                        MatMultTranspose_MPIAIJ,
2651                                        MatMultTransposeAdd_MPIAIJ,
2652                                        NULL,
2653                                        NULL,
2654                                        NULL,
2655                                        /*10*/ NULL,
2656                                        NULL,
2657                                        NULL,
2658                                        MatSOR_MPIAIJ,
2659                                        MatTranspose_MPIAIJ,
2660                                        /*15*/ MatGetInfo_MPIAIJ,
2661                                        MatEqual_MPIAIJ,
2662                                        MatGetDiagonal_MPIAIJ,
2663                                        MatDiagonalScale_MPIAIJ,
2664                                        MatNorm_MPIAIJ,
2665                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2666                                        MatAssemblyEnd_MPIAIJ,
2667                                        MatSetOption_MPIAIJ,
2668                                        MatZeroEntries_MPIAIJ,
2669                                        /*24*/ MatZeroRows_MPIAIJ,
2670                                        NULL,
2671                                        NULL,
2672                                        NULL,
2673                                        NULL,
2674                                        /*29*/ MatSetUp_MPIAIJ,
2675                                        NULL,
2676                                        NULL,
2677                                        MatGetDiagonalBlock_MPIAIJ,
2678                                        NULL,
2679                                        /*34*/ MatDuplicate_MPIAIJ,
2680                                        NULL,
2681                                        NULL,
2682                                        NULL,
2683                                        NULL,
2684                                        /*39*/ MatAXPY_MPIAIJ,
2685                                        MatCreateSubMatrices_MPIAIJ,
2686                                        MatIncreaseOverlap_MPIAIJ,
2687                                        MatGetValues_MPIAIJ,
2688                                        MatCopy_MPIAIJ,
2689                                        /*44*/ MatGetRowMax_MPIAIJ,
2690                                        MatScale_MPIAIJ,
2691                                        MatShift_MPIAIJ,
2692                                        MatDiagonalSet_MPIAIJ,
2693                                        MatZeroRowsColumns_MPIAIJ,
2694                                        /*49*/ MatSetRandom_MPIAIJ,
2695                                        MatGetRowIJ_MPIAIJ,
2696                                        MatRestoreRowIJ_MPIAIJ,
2697                                        NULL,
2698                                        NULL,
2699                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2700                                        NULL,
2701                                        MatSetUnfactored_MPIAIJ,
2702                                        MatPermute_MPIAIJ,
2703                                        NULL,
2704                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2705                                        MatDestroy_MPIAIJ,
2706                                        MatView_MPIAIJ,
2707                                        NULL,
2708                                        NULL,
2709                                        /*64*/ NULL,
2710                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2711                                        NULL,
2712                                        NULL,
2713                                        NULL,
2714                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2715                                        MatGetRowMinAbs_MPIAIJ,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        /*75*/ MatFDColoringApply_AIJ,
2721                                        MatSetFromOptions_MPIAIJ,
2722                                        NULL,
2723                                        NULL,
2724                                        MatFindZeroDiagonals_MPIAIJ,
2725                                        /*80*/ NULL,
2726                                        NULL,
2727                                        NULL,
2728                                        /*83*/ MatLoad_MPIAIJ,
2729                                        MatIsSymmetric_MPIAIJ,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        /*89*/ NULL,
2735                                        NULL,
2736                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        MatBindToCPU_MPIAIJ,
2744                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2745                                        NULL,
2746                                        NULL,
2747                                        MatConjugate_MPIAIJ,
2748                                        NULL,
2749                                        /*104*/ MatSetValuesRow_MPIAIJ,
2750                                        MatRealPart_MPIAIJ,
2751                                        MatImaginaryPart_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        /*109*/ NULL,
2755                                        NULL,
2756                                        MatGetRowMin_MPIAIJ,
2757                                        NULL,
2758                                        MatMissingDiagonal_MPIAIJ,
2759                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2760                                        NULL,
2761                                        MatGetGhosts_MPIAIJ,
2762                                        NULL,
2763                                        NULL,
2764                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                        NULL,
2768                                        MatGetMultiProcBlock_MPIAIJ,
2769                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2770                                        MatGetColumnReductions_MPIAIJ,
2771                                        MatInvertBlockDiagonal_MPIAIJ,
2772                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2773                                        MatCreateSubMatricesMPI_MPIAIJ,
2774                                        /*129*/ NULL,
2775                                        NULL,
2776                                        NULL,
2777                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2778                                        NULL,
2779                                        /*134*/ NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        MatFDColoringSetUp_MPIXAIJ,
2788                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2789                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2790                                        /*145*/ NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        MatCreateGraph_Simple_AIJ,
2794                                        MatFilter_AIJ,
2795                                        /*150*/ NULL};
2796 
2797 /* ----------------------------------------------------------------------------------------*/
2798 
2799 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) {
2800   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2801 
2802   PetscFunctionBegin;
2803   PetscCall(MatStoreValues(aij->A));
2804   PetscCall(MatStoreValues(aij->B));
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) {
2809   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2810 
2811   PetscFunctionBegin;
2812   PetscCall(MatRetrieveValues(aij->A));
2813   PetscCall(MatRetrieveValues(aij->B));
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
2818   Mat_MPIAIJ *b;
2819   PetscMPIInt size;
2820 
2821   PetscFunctionBegin;
2822   PetscCall(PetscLayoutSetUp(B->rmap));
2823   PetscCall(PetscLayoutSetUp(B->cmap));
2824   b = (Mat_MPIAIJ *)B->data;
2825 
2826 #if defined(PETSC_USE_CTABLE)
2827   PetscCall(PetscTableDestroy(&b->colmap));
2828 #else
2829   PetscCall(PetscFree(b->colmap));
2830 #endif
2831   PetscCall(PetscFree(b->garray));
2832   PetscCall(VecDestroy(&b->lvec));
2833   PetscCall(VecScatterDestroy(&b->Mvctx));
2834 
2835   /* Because the B will have been resized we simply destroy it and create a new one each time */
2836   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2837   PetscCall(MatDestroy(&b->B));
2838   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2839   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2840   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2841   PetscCall(MatSetType(b->B, MATSEQAIJ));
2842 
2843   if (!B->preallocated) {
2844     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2845     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2846     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2847     PetscCall(MatSetType(b->A, MATSEQAIJ));
2848   }
2849 
2850   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2851   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2852   B->preallocated  = PETSC_TRUE;
2853   B->was_assembled = PETSC_FALSE;
2854   B->assembled     = PETSC_FALSE;
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) {
2859   Mat_MPIAIJ *b;
2860 
2861   PetscFunctionBegin;
2862   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2863   PetscCall(PetscLayoutSetUp(B->rmap));
2864   PetscCall(PetscLayoutSetUp(B->cmap));
2865   b = (Mat_MPIAIJ *)B->data;
2866 
2867 #if defined(PETSC_USE_CTABLE)
2868   PetscCall(PetscTableDestroy(&b->colmap));
2869 #else
2870   PetscCall(PetscFree(b->colmap));
2871 #endif
2872   PetscCall(PetscFree(b->garray));
2873   PetscCall(VecDestroy(&b->lvec));
2874   PetscCall(VecScatterDestroy(&b->Mvctx));
2875 
2876   PetscCall(MatResetPreallocation(b->A));
2877   PetscCall(MatResetPreallocation(b->B));
2878   B->preallocated  = PETSC_TRUE;
2879   B->was_assembled = PETSC_FALSE;
2880   B->assembled     = PETSC_FALSE;
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) {
2885   Mat         mat;
2886   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2887 
2888   PetscFunctionBegin;
2889   *newmat = NULL;
2890   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2891   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2892   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2893   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2894   a = (Mat_MPIAIJ *)mat->data;
2895 
2896   mat->factortype   = matin->factortype;
2897   mat->assembled    = matin->assembled;
2898   mat->insertmode   = NOT_SET_VALUES;
2899   mat->preallocated = matin->preallocated;
2900 
2901   a->size         = oldmat->size;
2902   a->rank         = oldmat->rank;
2903   a->donotstash   = oldmat->donotstash;
2904   a->roworiented  = oldmat->roworiented;
2905   a->rowindices   = NULL;
2906   a->rowvalues    = NULL;
2907   a->getrowactive = PETSC_FALSE;
2908 
2909   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2910   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2911 
2912   if (oldmat->colmap) {
2913 #if defined(PETSC_USE_CTABLE)
2914     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
2915 #else
2916     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2917     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2918 #endif
2919   } else a->colmap = NULL;
2920   if (oldmat->garray) {
2921     PetscInt len;
2922     len = oldmat->B->cmap->n;
2923     PetscCall(PetscMalloc1(len + 1, &a->garray));
2924     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2925   } else a->garray = NULL;
2926 
2927   /* It may happen MatDuplicate is called with a non-assembled matrix
2928      In fact, MatDuplicate only requires the matrix to be preallocated
2929      This may happen inside a DMCreateMatrix_Shell */
2930   if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); }
2931   if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); }
2932   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2933   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2934   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2935   *newmat = mat;
2936   PetscFunctionReturn(0);
2937 }
2938 
2939 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) {
2940   PetscBool isbinary, ishdf5;
2941 
2942   PetscFunctionBegin;
2943   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2944   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2945   /* force binary viewer to load .info file if it has not yet done so */
2946   PetscCall(PetscViewerSetUp(viewer));
2947   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2948   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2949   if (isbinary) {
2950     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
2951   } else if (ishdf5) {
2952 #if defined(PETSC_HAVE_HDF5)
2953     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
2954 #else
2955     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2956 #endif
2957   } else {
2958     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2959   }
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) {
2964   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
2965   PetscInt    *rowidxs, *colidxs;
2966   PetscScalar *matvals;
2967 
2968   PetscFunctionBegin;
2969   PetscCall(PetscViewerSetUp(viewer));
2970 
2971   /* read in matrix header */
2972   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
2973   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
2974   M  = header[1];
2975   N  = header[2];
2976   nz = header[3];
2977   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
2978   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
2979   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
2980 
2981   /* set block sizes from the viewer's .info file */
2982   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
2983   /* set global sizes if not set already */
2984   if (mat->rmap->N < 0) mat->rmap->N = M;
2985   if (mat->cmap->N < 0) mat->cmap->N = N;
2986   PetscCall(PetscLayoutSetUp(mat->rmap));
2987   PetscCall(PetscLayoutSetUp(mat->cmap));
2988 
2989   /* check if the matrix sizes are correct */
2990   PetscCall(MatGetSize(mat, &rows, &cols));
2991   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
2992 
2993   /* read in row lengths and build row indices */
2994   PetscCall(MatGetLocalSize(mat, &m, NULL));
2995   PetscCall(PetscMalloc1(m + 1, &rowidxs));
2996   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
2997   rowidxs[0] = 0;
2998   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
2999   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3000   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3001   /* read in column indices and matrix values */
3002   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3003   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3004   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3005   /* store matrix indices and values */
3006   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3007   PetscCall(PetscFree(rowidxs));
3008   PetscCall(PetscFree2(colidxs, matvals));
3009   PetscFunctionReturn(0);
3010 }
3011 
3012 /* Not scalable because of ISAllGather() unless getting all columns. */
3013 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) {
3014   IS          iscol_local;
3015   PetscBool   isstride;
3016   PetscMPIInt lisstride = 0, gisstride;
3017 
3018   PetscFunctionBegin;
3019   /* check if we are grabbing all columns*/
3020   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3021 
3022   if (isstride) {
3023     PetscInt start, len, mstart, mlen;
3024     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3025     PetscCall(ISGetLocalSize(iscol, &len));
3026     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3027     if (mstart == start && mlen - mstart == len) lisstride = 1;
3028   }
3029 
3030   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3031   if (gisstride) {
3032     PetscInt N;
3033     PetscCall(MatGetSize(mat, NULL, &N));
3034     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3035     PetscCall(ISSetIdentity(iscol_local));
3036     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3037   } else {
3038     PetscInt cbs;
3039     PetscCall(ISGetBlockSize(iscol, &cbs));
3040     PetscCall(ISAllGather(iscol, &iscol_local));
3041     PetscCall(ISSetBlockSize(iscol_local, cbs));
3042   }
3043 
3044   *isseq = iscol_local;
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 /*
3049  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3050  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3051 
3052  Input Parameters:
3053    mat - matrix
3054    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3055            i.e., mat->rstart <= isrow[i] < mat->rend
3056    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3057            i.e., mat->cstart <= iscol[i] < mat->cend
3058  Output Parameter:
3059    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3060    iscol_o - sequential column index set for retrieving mat->B
3061    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3062  */
3063 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) {
3064   Vec             x, cmap;
3065   const PetscInt *is_idx;
3066   PetscScalar    *xarray, *cmaparray;
3067   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3068   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3069   Mat             B    = a->B;
3070   Vec             lvec = a->lvec, lcmap;
3071   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3072   MPI_Comm        comm;
3073   VecScatter      Mvctx = a->Mvctx;
3074 
3075   PetscFunctionBegin;
3076   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3077   PetscCall(ISGetLocalSize(iscol, &ncols));
3078 
3079   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3080   PetscCall(MatCreateVecs(mat, &x, NULL));
3081   PetscCall(VecSet(x, -1.0));
3082   PetscCall(VecDuplicate(x, &cmap));
3083   PetscCall(VecSet(cmap, -1.0));
3084 
3085   /* Get start indices */
3086   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3087   isstart -= ncols;
3088   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3089 
3090   PetscCall(ISGetIndices(iscol, &is_idx));
3091   PetscCall(VecGetArray(x, &xarray));
3092   PetscCall(VecGetArray(cmap, &cmaparray));
3093   PetscCall(PetscMalloc1(ncols, &idx));
3094   for (i = 0; i < ncols; i++) {
3095     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3096     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3097     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3098   }
3099   PetscCall(VecRestoreArray(x, &xarray));
3100   PetscCall(VecRestoreArray(cmap, &cmaparray));
3101   PetscCall(ISRestoreIndices(iscol, &is_idx));
3102 
3103   /* Get iscol_d */
3104   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3105   PetscCall(ISGetBlockSize(iscol, &i));
3106   PetscCall(ISSetBlockSize(*iscol_d, i));
3107 
3108   /* Get isrow_d */
3109   PetscCall(ISGetLocalSize(isrow, &m));
3110   rstart = mat->rmap->rstart;
3111   PetscCall(PetscMalloc1(m, &idx));
3112   PetscCall(ISGetIndices(isrow, &is_idx));
3113   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3114   PetscCall(ISRestoreIndices(isrow, &is_idx));
3115 
3116   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3117   PetscCall(ISGetBlockSize(isrow, &i));
3118   PetscCall(ISSetBlockSize(*isrow_d, i));
3119 
3120   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3121   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3122   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3123 
3124   PetscCall(VecDuplicate(lvec, &lcmap));
3125 
3126   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3127   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3128 
3129   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3130   /* off-process column indices */
3131   count = 0;
3132   PetscCall(PetscMalloc1(Bn, &idx));
3133   PetscCall(PetscMalloc1(Bn, &cmap1));
3134 
3135   PetscCall(VecGetArray(lvec, &xarray));
3136   PetscCall(VecGetArray(lcmap, &cmaparray));
3137   for (i = 0; i < Bn; i++) {
3138     if (PetscRealPart(xarray[i]) > -1.0) {
3139       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3140       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3141       count++;
3142     }
3143   }
3144   PetscCall(VecRestoreArray(lvec, &xarray));
3145   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3146 
3147   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3148   /* cannot ensure iscol_o has same blocksize as iscol! */
3149 
3150   PetscCall(PetscFree(idx));
3151   *garray = cmap1;
3152 
3153   PetscCall(VecDestroy(&x));
3154   PetscCall(VecDestroy(&cmap));
3155   PetscCall(VecDestroy(&lcmap));
3156   PetscFunctionReturn(0);
3157 }
3158 
3159 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3160 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) {
3161   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3162   Mat         M = NULL;
3163   MPI_Comm    comm;
3164   IS          iscol_d, isrow_d, iscol_o;
3165   Mat         Asub = NULL, Bsub = NULL;
3166   PetscInt    n;
3167 
3168   PetscFunctionBegin;
3169   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3170 
3171   if (call == MAT_REUSE_MATRIX) {
3172     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3173     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3174     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3175 
3176     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3177     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3178 
3179     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3180     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3181 
3182     /* Update diagonal and off-diagonal portions of submat */
3183     asub = (Mat_MPIAIJ *)(*submat)->data;
3184     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3185     PetscCall(ISGetLocalSize(iscol_o, &n));
3186     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3187     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3188     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3189 
3190   } else { /* call == MAT_INITIAL_MATRIX) */
3191     const PetscInt *garray;
3192     PetscInt        BsubN;
3193 
3194     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3195     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3196 
3197     /* Create local submatrices Asub and Bsub */
3198     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3199     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3200 
3201     /* Create submatrix M */
3202     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3203 
3204     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3205     asub = (Mat_MPIAIJ *)M->data;
3206 
3207     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3208     n = asub->B->cmap->N;
3209     if (BsubN > n) {
3210       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3211       const PetscInt *idx;
3212       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3213       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3214 
3215       PetscCall(PetscMalloc1(n, &idx_new));
3216       j = 0;
3217       PetscCall(ISGetIndices(iscol_o, &idx));
3218       for (i = 0; i < n; i++) {
3219         if (j >= BsubN) break;
3220         while (subgarray[i] > garray[j]) j++;
3221 
3222         if (subgarray[i] == garray[j]) {
3223           idx_new[i] = idx[j++];
3224         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3225       }
3226       PetscCall(ISRestoreIndices(iscol_o, &idx));
3227 
3228       PetscCall(ISDestroy(&iscol_o));
3229       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3230 
3231     } else if (BsubN < n) {
3232       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3233     }
3234 
3235     PetscCall(PetscFree(garray));
3236     *submat = M;
3237 
3238     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3239     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3240     PetscCall(ISDestroy(&isrow_d));
3241 
3242     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3243     PetscCall(ISDestroy(&iscol_d));
3244 
3245     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3246     PetscCall(ISDestroy(&iscol_o));
3247   }
3248   PetscFunctionReturn(0);
3249 }
3250 
3251 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) {
3252   IS        iscol_local = NULL, isrow_d;
3253   PetscInt  csize;
3254   PetscInt  n, i, j, start, end;
3255   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3256   MPI_Comm  comm;
3257 
3258   PetscFunctionBegin;
3259   /* If isrow has same processor distribution as mat,
3260      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3261   if (call == MAT_REUSE_MATRIX) {
3262     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3263     if (isrow_d) {
3264       sameRowDist  = PETSC_TRUE;
3265       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3266     } else {
3267       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3268       if (iscol_local) {
3269         sameRowDist  = PETSC_TRUE;
3270         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3271       }
3272     }
3273   } else {
3274     /* Check if isrow has same processor distribution as mat */
3275     sameDist[0] = PETSC_FALSE;
3276     PetscCall(ISGetLocalSize(isrow, &n));
3277     if (!n) {
3278       sameDist[0] = PETSC_TRUE;
3279     } else {
3280       PetscCall(ISGetMinMax(isrow, &i, &j));
3281       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3282       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3283     }
3284 
3285     /* Check if iscol has same processor distribution as mat */
3286     sameDist[1] = PETSC_FALSE;
3287     PetscCall(ISGetLocalSize(iscol, &n));
3288     if (!n) {
3289       sameDist[1] = PETSC_TRUE;
3290     } else {
3291       PetscCall(ISGetMinMax(iscol, &i, &j));
3292       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3293       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3294     }
3295 
3296     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3297     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3298     sameRowDist = tsameDist[0];
3299   }
3300 
3301   if (sameRowDist) {
3302     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3303       /* isrow and iscol have same processor distribution as mat */
3304       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3305       PetscFunctionReturn(0);
3306     } else { /* sameRowDist */
3307       /* isrow has same processor distribution as mat */
3308       if (call == MAT_INITIAL_MATRIX) {
3309         PetscBool sorted;
3310         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3311         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3312         PetscCall(ISGetSize(iscol, &i));
3313         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3314 
3315         PetscCall(ISSorted(iscol_local, &sorted));
3316         if (sorted) {
3317           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3318           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3319           PetscFunctionReturn(0);
3320         }
3321       } else { /* call == MAT_REUSE_MATRIX */
3322         IS iscol_sub;
3323         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3324         if (iscol_sub) {
3325           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3326           PetscFunctionReturn(0);
3327         }
3328       }
3329     }
3330   }
3331 
3332   /* General case: iscol -> iscol_local which has global size of iscol */
3333   if (call == MAT_REUSE_MATRIX) {
3334     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3335     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3336   } else {
3337     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3338   }
3339 
3340   PetscCall(ISGetLocalSize(iscol, &csize));
3341   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3342 
3343   if (call == MAT_INITIAL_MATRIX) {
3344     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3345     PetscCall(ISDestroy(&iscol_local));
3346   }
3347   PetscFunctionReturn(0);
3348 }
3349 
3350 /*@C
3351      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3352          and "off-diagonal" part of the matrix in CSR format.
3353 
3354    Collective
3355 
3356    Input Parameters:
3357 +  comm - MPI communicator
3358 .  A - "diagonal" portion of matrix
3359 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3360 -  garray - global index of B columns
3361 
3362    Output Parameter:
3363 .   mat - the matrix, with input A as its local diagonal matrix
3364    Level: advanced
3365 
3366    Notes:
3367    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3368 
3369    A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3370 
3371 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3372 @*/
3373 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) {
3374   Mat_MPIAIJ        *maij;
3375   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3376   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3377   const PetscScalar *oa;
3378   Mat                Bnew;
3379   PetscInt           m, n, N;
3380   MatType            mpi_mat_type;
3381 
3382   PetscFunctionBegin;
3383   PetscCall(MatCreate(comm, mat));
3384   PetscCall(MatGetSize(A, &m, &n));
3385   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3386   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3387   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3388   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3389 
3390   /* Get global columns of mat */
3391   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3392 
3393   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3394   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3395   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3396   PetscCall(MatSetType(*mat, mpi_mat_type));
3397 
3398   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3399   maij = (Mat_MPIAIJ *)(*mat)->data;
3400 
3401   (*mat)->preallocated = PETSC_TRUE;
3402 
3403   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3404   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3405 
3406   /* Set A as diagonal portion of *mat */
3407   maij->A = A;
3408 
3409   nz = oi[m];
3410   for (i = 0; i < nz; i++) {
3411     col   = oj[i];
3412     oj[i] = garray[col];
3413   }
3414 
3415   /* Set Bnew as off-diagonal portion of *mat */
3416   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3417   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3418   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3419   bnew        = (Mat_SeqAIJ *)Bnew->data;
3420   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3421   maij->B     = Bnew;
3422 
3423   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3424 
3425   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3426   b->free_a       = PETSC_FALSE;
3427   b->free_ij      = PETSC_FALSE;
3428   PetscCall(MatDestroy(&B));
3429 
3430   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3431   bnew->free_a       = PETSC_TRUE;
3432   bnew->free_ij      = PETSC_TRUE;
3433 
3434   /* condense columns of maij->B */
3435   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3436   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3437   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3438   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3439   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3440   PetscFunctionReturn(0);
3441 }
3442 
3443 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3444 
3445 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) {
3446   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3447   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3448   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3449   Mat             M, Msub, B = a->B;
3450   MatScalar      *aa;
3451   Mat_SeqAIJ     *aij;
3452   PetscInt       *garray = a->garray, *colsub, Ncols;
3453   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3454   IS              iscol_sub, iscmap;
3455   const PetscInt *is_idx, *cmap;
3456   PetscBool       allcolumns = PETSC_FALSE;
3457   MPI_Comm        comm;
3458 
3459   PetscFunctionBegin;
3460   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3461   if (call == MAT_REUSE_MATRIX) {
3462     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3463     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3464     PetscCall(ISGetLocalSize(iscol_sub, &count));
3465 
3466     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3467     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3468 
3469     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3470     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3471 
3472     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3473 
3474   } else { /* call == MAT_INITIAL_MATRIX) */
3475     PetscBool flg;
3476 
3477     PetscCall(ISGetLocalSize(iscol, &n));
3478     PetscCall(ISGetSize(iscol, &Ncols));
3479 
3480     /* (1) iscol -> nonscalable iscol_local */
3481     /* Check for special case: each processor gets entire matrix columns */
3482     PetscCall(ISIdentity(iscol_local, &flg));
3483     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3484     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3485     if (allcolumns) {
3486       iscol_sub = iscol_local;
3487       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3488       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3489 
3490     } else {
3491       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3492       PetscInt *idx, *cmap1, k;
3493       PetscCall(PetscMalloc1(Ncols, &idx));
3494       PetscCall(PetscMalloc1(Ncols, &cmap1));
3495       PetscCall(ISGetIndices(iscol_local, &is_idx));
3496       count = 0;
3497       k     = 0;
3498       for (i = 0; i < Ncols; i++) {
3499         j = is_idx[i];
3500         if (j >= cstart && j < cend) {
3501           /* diagonal part of mat */
3502           idx[count]     = j;
3503           cmap1[count++] = i; /* column index in submat */
3504         } else if (Bn) {
3505           /* off-diagonal part of mat */
3506           if (j == garray[k]) {
3507             idx[count]     = j;
3508             cmap1[count++] = i; /* column index in submat */
3509           } else if (j > garray[k]) {
3510             while (j > garray[k] && k < Bn - 1) k++;
3511             if (j == garray[k]) {
3512               idx[count]     = j;
3513               cmap1[count++] = i; /* column index in submat */
3514             }
3515           }
3516         }
3517       }
3518       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3519 
3520       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3521       PetscCall(ISGetBlockSize(iscol, &cbs));
3522       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3523 
3524       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3525     }
3526 
3527     /* (3) Create sequential Msub */
3528     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3529   }
3530 
3531   PetscCall(ISGetLocalSize(iscol_sub, &count));
3532   aij = (Mat_SeqAIJ *)(Msub)->data;
3533   ii  = aij->i;
3534   PetscCall(ISGetIndices(iscmap, &cmap));
3535 
3536   /*
3537       m - number of local rows
3538       Ncols - number of columns (same on all processors)
3539       rstart - first row in new global matrix generated
3540   */
3541   PetscCall(MatGetSize(Msub, &m, NULL));
3542 
3543   if (call == MAT_INITIAL_MATRIX) {
3544     /* (4) Create parallel newmat */
3545     PetscMPIInt rank, size;
3546     PetscInt    csize;
3547 
3548     PetscCallMPI(MPI_Comm_size(comm, &size));
3549     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3550 
3551     /*
3552         Determine the number of non-zeros in the diagonal and off-diagonal
3553         portions of the matrix in order to do correct preallocation
3554     */
3555 
3556     /* first get start and end of "diagonal" columns */
3557     PetscCall(ISGetLocalSize(iscol, &csize));
3558     if (csize == PETSC_DECIDE) {
3559       PetscCall(ISGetSize(isrow, &mglobal));
3560       if (mglobal == Ncols) { /* square matrix */
3561         nlocal = m;
3562       } else {
3563         nlocal = Ncols / size + ((Ncols % size) > rank);
3564       }
3565     } else {
3566       nlocal = csize;
3567     }
3568     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3569     rstart = rend - nlocal;
3570     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3571 
3572     /* next, compute all the lengths */
3573     jj = aij->j;
3574     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3575     olens = dlens + m;
3576     for (i = 0; i < m; i++) {
3577       jend = ii[i + 1] - ii[i];
3578       olen = 0;
3579       dlen = 0;
3580       for (j = 0; j < jend; j++) {
3581         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3582         else dlen++;
3583         jj++;
3584       }
3585       olens[i] = olen;
3586       dlens[i] = dlen;
3587     }
3588 
3589     PetscCall(ISGetBlockSize(isrow, &bs));
3590     PetscCall(ISGetBlockSize(iscol, &cbs));
3591 
3592     PetscCall(MatCreate(comm, &M));
3593     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3594     PetscCall(MatSetBlockSizes(M, bs, cbs));
3595     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3596     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3597     PetscCall(PetscFree(dlens));
3598 
3599   } else { /* call == MAT_REUSE_MATRIX */
3600     M = *newmat;
3601     PetscCall(MatGetLocalSize(M, &i, NULL));
3602     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3603     PetscCall(MatZeroEntries(M));
3604     /*
3605          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3606        rather than the slower MatSetValues().
3607     */
3608     M->was_assembled = PETSC_TRUE;
3609     M->assembled     = PETSC_FALSE;
3610   }
3611 
3612   /* (5) Set values of Msub to *newmat */
3613   PetscCall(PetscMalloc1(count, &colsub));
3614   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3615 
3616   jj = aij->j;
3617   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3618   for (i = 0; i < m; i++) {
3619     row = rstart + i;
3620     nz  = ii[i + 1] - ii[i];
3621     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3622     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3623     jj += nz;
3624     aa += nz;
3625   }
3626   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3627   PetscCall(ISRestoreIndices(iscmap, &cmap));
3628 
3629   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3630   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3631 
3632   PetscCall(PetscFree(colsub));
3633 
3634   /* save Msub, iscol_sub and iscmap used in processor for next request */
3635   if (call == MAT_INITIAL_MATRIX) {
3636     *newmat = M;
3637     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3638     PetscCall(MatDestroy(&Msub));
3639 
3640     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3641     PetscCall(ISDestroy(&iscol_sub));
3642 
3643     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3644     PetscCall(ISDestroy(&iscmap));
3645 
3646     if (iscol_local) {
3647       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3648       PetscCall(ISDestroy(&iscol_local));
3649     }
3650   }
3651   PetscFunctionReturn(0);
3652 }
3653 
3654 /*
3655     Not great since it makes two copies of the submatrix, first an SeqAIJ
3656   in local and then by concatenating the local matrices the end result.
3657   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3658 
3659   This requires a sequential iscol with all indices.
3660 */
3661 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) {
3662   PetscMPIInt rank, size;
3663   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3664   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3665   Mat         M, Mreuse;
3666   MatScalar  *aa, *vwork;
3667   MPI_Comm    comm;
3668   Mat_SeqAIJ *aij;
3669   PetscBool   colflag, allcolumns = PETSC_FALSE;
3670 
3671   PetscFunctionBegin;
3672   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3673   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3674   PetscCallMPI(MPI_Comm_size(comm, &size));
3675 
3676   /* Check for special case: each processor gets entire matrix columns */
3677   PetscCall(ISIdentity(iscol, &colflag));
3678   PetscCall(ISGetLocalSize(iscol, &n));
3679   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3680   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3681 
3682   if (call == MAT_REUSE_MATRIX) {
3683     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3684     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3685     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3686   } else {
3687     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3688   }
3689 
3690   /*
3691       m - number of local rows
3692       n - number of columns (same on all processors)
3693       rstart - first row in new global matrix generated
3694   */
3695   PetscCall(MatGetSize(Mreuse, &m, &n));
3696   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3697   if (call == MAT_INITIAL_MATRIX) {
3698     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3699     ii  = aij->i;
3700     jj  = aij->j;
3701 
3702     /*
3703         Determine the number of non-zeros in the diagonal and off-diagonal
3704         portions of the matrix in order to do correct preallocation
3705     */
3706 
3707     /* first get start and end of "diagonal" columns */
3708     if (csize == PETSC_DECIDE) {
3709       PetscCall(ISGetSize(isrow, &mglobal));
3710       if (mglobal == n) { /* square matrix */
3711         nlocal = m;
3712       } else {
3713         nlocal = n / size + ((n % size) > rank);
3714       }
3715     } else {
3716       nlocal = csize;
3717     }
3718     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3719     rstart = rend - nlocal;
3720     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3721 
3722     /* next, compute all the lengths */
3723     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3724     olens = dlens + m;
3725     for (i = 0; i < m; i++) {
3726       jend = ii[i + 1] - ii[i];
3727       olen = 0;
3728       dlen = 0;
3729       for (j = 0; j < jend; j++) {
3730         if (*jj < rstart || *jj >= rend) olen++;
3731         else dlen++;
3732         jj++;
3733       }
3734       olens[i] = olen;
3735       dlens[i] = dlen;
3736     }
3737     PetscCall(MatCreate(comm, &M));
3738     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3739     PetscCall(MatSetBlockSizes(M, bs, cbs));
3740     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3741     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3742     PetscCall(PetscFree(dlens));
3743   } else {
3744     PetscInt ml, nl;
3745 
3746     M = *newmat;
3747     PetscCall(MatGetLocalSize(M, &ml, &nl));
3748     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3749     PetscCall(MatZeroEntries(M));
3750     /*
3751          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3752        rather than the slower MatSetValues().
3753     */
3754     M->was_assembled = PETSC_TRUE;
3755     M->assembled     = PETSC_FALSE;
3756   }
3757   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3758   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3759   ii  = aij->i;
3760   jj  = aij->j;
3761 
3762   /* trigger copy to CPU if needed */
3763   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3764   for (i = 0; i < m; i++) {
3765     row   = rstart + i;
3766     nz    = ii[i + 1] - ii[i];
3767     cwork = jj;
3768     jj += nz;
3769     vwork = aa;
3770     aa += nz;
3771     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3772   }
3773   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3774 
3775   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3776   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3777   *newmat = M;
3778 
3779   /* save submatrix used in processor for next request */
3780   if (call == MAT_INITIAL_MATRIX) {
3781     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3782     PetscCall(MatDestroy(&Mreuse));
3783   }
3784   PetscFunctionReturn(0);
3785 }
3786 
3787 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) {
3788   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3789   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3790   const PetscInt *JJ;
3791   PetscBool       nooffprocentries;
3792   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3793 
3794   PetscFunctionBegin;
3795   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3796 
3797   PetscCall(PetscLayoutSetUp(B->rmap));
3798   PetscCall(PetscLayoutSetUp(B->cmap));
3799   m      = B->rmap->n;
3800   cstart = B->cmap->rstart;
3801   cend   = B->cmap->rend;
3802   rstart = B->rmap->rstart;
3803 
3804   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3805 
3806   if (PetscDefined(USE_DEBUG)) {
3807     for (i = 0; i < m; i++) {
3808       nnz = Ii[i + 1] - Ii[i];
3809       JJ  = J + Ii[i];
3810       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3811       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3812       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3813     }
3814   }
3815 
3816   for (i = 0; i < m; i++) {
3817     nnz     = Ii[i + 1] - Ii[i];
3818     JJ      = J + Ii[i];
3819     nnz_max = PetscMax(nnz_max, nnz);
3820     d       = 0;
3821     for (j = 0; j < nnz; j++) {
3822       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3823     }
3824     d_nnz[i] = d;
3825     o_nnz[i] = nnz - d;
3826   }
3827   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3828   PetscCall(PetscFree2(d_nnz, o_nnz));
3829 
3830   for (i = 0; i < m; i++) {
3831     ii = i + rstart;
3832     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3833   }
3834   nooffprocentries    = B->nooffprocentries;
3835   B->nooffprocentries = PETSC_TRUE;
3836   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3837   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3838   B->nooffprocentries = nooffprocentries;
3839 
3840   /* count number of entries below block diagonal */
3841   PetscCall(PetscFree(Aij->ld));
3842   PetscCall(PetscCalloc1(m, &ld));
3843   Aij->ld = ld;
3844   for (i = 0; i < m; i++) {
3845     nnz = Ii[i + 1] - Ii[i];
3846     j   = 0;
3847     while (j < nnz && J[j] < cstart) j++;
3848     ld[i] = j;
3849     J += nnz;
3850   }
3851 
3852   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3853   PetscFunctionReturn(0);
3854 }
3855 
3856 /*@
3857    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3858    (the default parallel PETSc format).
3859 
3860    Collective
3861 
3862    Input Parameters:
3863 +  B - the matrix
3864 .  i - the indices into j for the start of each local row (starts with zero)
3865 .  j - the column indices for each local row (starts with zero)
3866 -  v - optional values in the matrix
3867 
3868    Level: developer
3869 
3870    Notes:
3871        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3872      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3873      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3874 
3875        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3876 
3877        The format which is used for the sparse matrix input, is equivalent to a
3878     row-major ordering.. i.e for the following matrix, the input data expected is
3879     as shown
3880 
3881 $        1 0 0
3882 $        2 0 3     P0
3883 $       -------
3884 $        4 5 6     P1
3885 $
3886 $     Process0 [P0]: rows_owned=[0,1]
3887 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3888 $        j =  {0,0,2}  [size = 3]
3889 $        v =  {1,2,3}  [size = 3]
3890 $
3891 $     Process1 [P1]: rows_owned=[2]
3892 $        i =  {0,3}    [size = nrow+1  = 1+1]
3893 $        j =  {0,1,2}  [size = 3]
3894 $        v =  {4,5,6}  [size = 3]
3895 
3896 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3897           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3898 @*/
3899 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
3900   PetscFunctionBegin;
3901   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3902   PetscFunctionReturn(0);
3903 }
3904 
3905 /*@C
3906    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3907    (the default parallel PETSc format).  For good matrix assembly performance
3908    the user should preallocate the matrix storage by setting the parameters
3909    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3910    performance can be increased by more than a factor of 50.
3911 
3912    Collective
3913 
3914    Input Parameters:
3915 +  B - the matrix
3916 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3917            (same value is used for all local rows)
3918 .  d_nnz - array containing the number of nonzeros in the various rows of the
3919            DIAGONAL portion of the local submatrix (possibly different for each row)
3920            or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure.
3921            The size of this array is equal to the number of local rows, i.e 'm'.
3922            For matrices that will be factored, you must leave room for (and set)
3923            the diagonal entry even if it is zero.
3924 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3925            submatrix (same value is used for all local rows).
3926 -  o_nnz - array containing the number of nonzeros in the various rows of the
3927            OFF-DIAGONAL portion of the local submatrix (possibly different for
3928            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero
3929            structure. The size of this array is equal to the number
3930            of local rows, i.e 'm'.
3931 
3932    If the *_nnz parameter is given then the *_nz parameter is ignored
3933 
3934    The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77
3935    storage.  The stored row and column indices begin with zero.
3936    See [Sparse Matrices](sec_matsparse) for details.
3937 
3938    The parallel matrix is partitioned such that the first m0 rows belong to
3939    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3940    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3941 
3942    The DIAGONAL portion of the local submatrix of a processor can be defined
3943    as the submatrix which is obtained by extraction the part corresponding to
3944    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3945    first row that belongs to the processor, r2 is the last row belonging to
3946    the this processor, and c1-c2 is range of indices of the local part of a
3947    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3948    common case of a square matrix, the row and column ranges are the same and
3949    the DIAGONAL part is also square. The remaining portion of the local
3950    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3951 
3952    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3953 
3954    You can call MatGetInfo() to get information on how effective the preallocation was;
3955    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3956    You can also run with the option -info and look for messages with the string
3957    malloc in them to see if additional memory allocation was needed.
3958 
3959    Example usage:
3960 
3961    Consider the following 8x8 matrix with 34 non-zero values, that is
3962    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3963    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3964    as follows:
3965 
3966 .vb
3967             1  2  0  |  0  3  0  |  0  4
3968     Proc0   0  5  6  |  7  0  0  |  8  0
3969             9  0 10  | 11  0  0  | 12  0
3970     -------------------------------------
3971            13  0 14  | 15 16 17  |  0  0
3972     Proc1   0 18  0  | 19 20 21  |  0  0
3973             0  0  0  | 22 23  0  | 24  0
3974     -------------------------------------
3975     Proc2  25 26 27  |  0  0 28  | 29  0
3976            30  0  0  | 31 32 33  |  0 34
3977 .ve
3978 
3979    This can be represented as a collection of submatrices as:
3980 
3981 .vb
3982       A B C
3983       D E F
3984       G H I
3985 .ve
3986 
3987    Where the submatrices A,B,C are owned by proc0, D,E,F are
3988    owned by proc1, G,H,I are owned by proc2.
3989 
3990    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3991    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3992    The 'M','N' parameters are 8,8, and have the same values on all procs.
3993 
3994    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3995    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3996    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3997    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3998    part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ
3999    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4000 
4001    When d_nz, o_nz parameters are specified, d_nz storage elements are
4002    allocated for every row of the local diagonal submatrix, and o_nz
4003    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4004    One way to choose d_nz and o_nz is to use the max nonzerors per local
4005    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4006    In this case, the values of d_nz,o_nz are:
4007 .vb
4008      proc0 : dnz = 2, o_nz = 2
4009      proc1 : dnz = 3, o_nz = 2
4010      proc2 : dnz = 1, o_nz = 4
4011 .ve
4012    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4013    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4014    for proc3. i.e we are using 12+15+10=37 storage locations to store
4015    34 values.
4016 
4017    When d_nnz, o_nnz parameters are specified, the storage is specified
4018    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4019    In the above case the values for d_nnz,o_nnz are:
4020 .vb
4021      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4022      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4023      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4024 .ve
4025    Here the space allocated is sum of all the above values i.e 34, and
4026    hence pre-allocation is perfect.
4027 
4028    Level: intermediate
4029 
4030 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4031           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4032 @*/
4033 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
4034   PetscFunctionBegin;
4035   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4036   PetscValidType(B, 1);
4037   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4038   PetscFunctionReturn(0);
4039 }
4040 
4041 /*@
4042      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4043          CSR format for the local rows.
4044 
4045    Collective
4046 
4047    Input Parameters:
4048 +  comm - MPI communicator
4049 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4050 .  n - This value should be the same as the local size used in creating the
4051        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4052        calculated if N is given) For square matrices n is almost always m.
4053 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4054 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4055 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4056 .   j - column indices
4057 -   a - optional matrix values
4058 
4059    Output Parameter:
4060 .   mat - the matrix
4061 
4062    Level: intermediate
4063 
4064    Notes:
4065        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4066      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4067      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4068 
4069        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4070 
4071        The format which is used for the sparse matrix input, is equivalent to a
4072     row-major ordering.. i.e for the following matrix, the input data expected is
4073     as shown
4074 
4075        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4076 
4077 $        1 0 0
4078 $        2 0 3     P0
4079 $       -------
4080 $        4 5 6     P1
4081 $
4082 $     Process0 [P0]: rows_owned=[0,1]
4083 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4084 $        j =  {0,0,2}  [size = 3]
4085 $        v =  {1,2,3}  [size = 3]
4086 $
4087 $     Process1 [P1]: rows_owned=[2]
4088 $        i =  {0,3}    [size = nrow+1  = 1+1]
4089 $        j =  {0,1,2}  [size = 3]
4090 $        v =  {4,5,6}  [size = 3]
4091 
4092 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4093           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4094 @*/
4095 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) {
4096   PetscFunctionBegin;
4097   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4098   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4099   PetscCall(MatCreate(comm, mat));
4100   PetscCall(MatSetSizes(*mat, m, n, M, N));
4101   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4102   PetscCall(MatSetType(*mat, MATMPIAIJ));
4103   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4104   PetscFunctionReturn(0);
4105 }
4106 
4107 /*@
4108      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4109          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()`
4110 
4111      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4112 
4113    Collective
4114 
4115    Input Parameters:
4116 +  mat - the matrix
4117 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4118 .  n - This value should be the same as the local size used in creating the
4119        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4120        calculated if N is given) For square matrices n is almost always m.
4121 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4122 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4123 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4124 .  J - column indices
4125 -  v - matrix values
4126 
4127    Level: intermediate
4128 
4129 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4130           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4131 @*/
4132 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) {
4133   PetscInt        nnz, i;
4134   PetscBool       nooffprocentries;
4135   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4136   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4137   PetscScalar    *ad, *ao;
4138   PetscInt        ldi, Iii, md;
4139   const PetscInt *Adi = Ad->i;
4140   PetscInt       *ld  = Aij->ld;
4141 
4142   PetscFunctionBegin;
4143   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4144   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4145   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4146   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4147 
4148   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4149   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4150 
4151   for (i = 0; i < m; i++) {
4152     nnz = Ii[i + 1] - Ii[i];
4153     Iii = Ii[i];
4154     ldi = ld[i];
4155     md  = Adi[i + 1] - Adi[i];
4156     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4157     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4158     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4159     ad += md;
4160     ao += nnz - md;
4161   }
4162   nooffprocentries      = mat->nooffprocentries;
4163   mat->nooffprocentries = PETSC_TRUE;
4164   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4165   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4166   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4167   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4168   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4169   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4170   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4171   mat->nooffprocentries = nooffprocentries;
4172   PetscFunctionReturn(0);
4173 }
4174 
4175 /*@
4176      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4177 
4178    Collective
4179 
4180    Input Parameters:
4181 +  mat - the matrix
4182 -  v - matrix values, stored by row
4183 
4184    Level: intermediate
4185 
4186    Note:
4187    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4188 
4189 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4190           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4191 @*/
4192 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) {
4193   PetscInt        nnz, i, m;
4194   PetscBool       nooffprocentries;
4195   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4196   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4197   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4198   PetscScalar    *ad, *ao;
4199   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4200   PetscInt        ldi, Iii, md;
4201   PetscInt       *ld = Aij->ld;
4202 
4203   PetscFunctionBegin;
4204   m = mat->rmap->n;
4205 
4206   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4207   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4208   Iii = 0;
4209   for (i = 0; i < m; i++) {
4210     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4211     ldi = ld[i];
4212     md  = Adi[i + 1] - Adi[i];
4213     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4214     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4215     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4216     ad += md;
4217     ao += nnz - md;
4218     Iii += nnz;
4219   }
4220   nooffprocentries      = mat->nooffprocentries;
4221   mat->nooffprocentries = PETSC_TRUE;
4222   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4223   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4224   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4225   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4226   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4227   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4228   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4229   mat->nooffprocentries = nooffprocentries;
4230   PetscFunctionReturn(0);
4231 }
4232 
4233 /*@C
4234    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4235    (the default parallel PETSc format).  For good matrix assembly performance
4236    the user should preallocate the matrix storage by setting the parameters
4237    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4238    performance can be increased by more than a factor of 50.
4239 
4240    Collective
4241 
4242    Input Parameters:
4243 +  comm - MPI communicator
4244 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4245            This value should be the same as the local size used in creating the
4246            y vector for the matrix-vector product y = Ax.
4247 .  n - This value should be the same as the local size used in creating the
4248        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4249        calculated if N is given) For square matrices n is almost always m.
4250 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4251 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4252 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4253            (same value is used for all local rows)
4254 .  d_nnz - array containing the number of nonzeros in the various rows of the
4255            DIAGONAL portion of the local submatrix (possibly different for each row)
4256            or NULL, if d_nz is used to specify the nonzero structure.
4257            The size of this array is equal to the number of local rows, i.e 'm'.
4258 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4259            submatrix (same value is used for all local rows).
4260 -  o_nnz - array containing the number of nonzeros in the various rows of the
4261            OFF-DIAGONAL portion of the local submatrix (possibly different for
4262            each row) or NULL, if o_nz is used to specify the nonzero
4263            structure. The size of this array is equal to the number
4264            of local rows, i.e 'm'.
4265 
4266    Output Parameter:
4267 .  A - the matrix
4268 
4269    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4270    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4271    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4272 
4273    Notes:
4274    If the *_nnz parameter is given then the *_nz parameter is ignored
4275 
4276    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4277    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4278    storage requirements for this matrix.
4279 
4280    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4281    processor than it must be used on all processors that share the object for
4282    that argument.
4283 
4284    The user MUST specify either the local or global matrix dimensions
4285    (possibly both).
4286 
4287    The parallel matrix is partitioned across processors such that the
4288    first m0 rows belong to process 0, the next m1 rows belong to
4289    process 1, the next m2 rows belong to process 2 etc.. where
4290    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4291    values corresponding to [m x N] submatrix.
4292 
4293    The columns are logically partitioned with the n0 columns belonging
4294    to 0th partition, the next n1 columns belonging to the next
4295    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4296 
4297    The DIAGONAL portion of the local submatrix on any given processor
4298    is the submatrix corresponding to the rows and columns m,n
4299    corresponding to the given processor. i.e diagonal matrix on
4300    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4301    etc. The remaining portion of the local submatrix [m x (N-n)]
4302    constitute the OFF-DIAGONAL portion. The example below better
4303    illustrates this concept.
4304 
4305    For a square global matrix we define each processor's diagonal portion
4306    to be its local rows and the corresponding columns (a square submatrix);
4307    each processor's off-diagonal portion encompasses the remainder of the
4308    local matrix (a rectangular submatrix).
4309 
4310    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4311 
4312    When calling this routine with a single process communicator, a matrix of
4313    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4314    type of communicator, use the construction mechanism
4315 .vb
4316      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4317 .ve
4318 
4319 $     MatCreate(...,&A);
4320 $     MatSetType(A,MATMPIAIJ);
4321 $     MatSetSizes(A, m,n,M,N);
4322 $     MatMPIAIJSetPreallocation(A,...);
4323 
4324    By default, this format uses inodes (identical nodes) when possible.
4325    We search for consecutive rows with the same nonzero structure, thereby
4326    reusing matrix information to achieve increased efficiency.
4327 
4328    Options Database Keys:
4329 +  -mat_no_inode  - Do not use inodes
4330 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4331 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4332         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4333         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4334 
4335    Example usage:
4336 
4337    Consider the following 8x8 matrix with 34 non-zero values, that is
4338    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4339    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4340    as follows
4341 
4342 .vb
4343             1  2  0  |  0  3  0  |  0  4
4344     Proc0   0  5  6  |  7  0  0  |  8  0
4345             9  0 10  | 11  0  0  | 12  0
4346     -------------------------------------
4347            13  0 14  | 15 16 17  |  0  0
4348     Proc1   0 18  0  | 19 20 21  |  0  0
4349             0  0  0  | 22 23  0  | 24  0
4350     -------------------------------------
4351     Proc2  25 26 27  |  0  0 28  | 29  0
4352            30  0  0  | 31 32 33  |  0 34
4353 .ve
4354 
4355    This can be represented as a collection of submatrices as
4356 
4357 .vb
4358       A B C
4359       D E F
4360       G H I
4361 .ve
4362 
4363    Where the submatrices A,B,C are owned by proc0, D,E,F are
4364    owned by proc1, G,H,I are owned by proc2.
4365 
4366    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4367    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4368    The 'M','N' parameters are 8,8, and have the same values on all procs.
4369 
4370    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4371    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4372    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4373    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4374    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4375    matrix, ans [DF] as another SeqAIJ matrix.
4376 
4377    When d_nz, o_nz parameters are specified, d_nz storage elements are
4378    allocated for every row of the local diagonal submatrix, and o_nz
4379    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4380    One way to choose d_nz and o_nz is to use the max nonzerors per local
4381    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4382    In this case, the values of d_nz,o_nz are
4383 .vb
4384      proc0 : dnz = 2, o_nz = 2
4385      proc1 : dnz = 3, o_nz = 2
4386      proc2 : dnz = 1, o_nz = 4
4387 .ve
4388    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4389    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4390    for proc3. i.e we are using 12+15+10=37 storage locations to store
4391    34 values.
4392 
4393    When d_nnz, o_nnz parameters are specified, the storage is specified
4394    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4395    In the above case the values for d_nnz,o_nnz are
4396 .vb
4397      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4398      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4399      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4400 .ve
4401    Here the space allocated is sum of all the above values i.e 34, and
4402    hence pre-allocation is perfect.
4403 
4404    Level: intermediate
4405 
4406 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4407           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4408 @*/
4409 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) {
4410   PetscMPIInt size;
4411 
4412   PetscFunctionBegin;
4413   PetscCall(MatCreate(comm, A));
4414   PetscCall(MatSetSizes(*A, m, n, M, N));
4415   PetscCallMPI(MPI_Comm_size(comm, &size));
4416   if (size > 1) {
4417     PetscCall(MatSetType(*A, MATMPIAIJ));
4418     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4419   } else {
4420     PetscCall(MatSetType(*A, MATSEQAIJ));
4421     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4422   }
4423   PetscFunctionReturn(0);
4424 }
4425 
4426 /*@C
4427   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4428 
4429   Not collective
4430 
4431   Input Parameter:
4432 . A - The `MATMPIAIJ` matrix
4433 
4434   Output Parameters:
4435 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4436 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4437 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4438 
4439   Note:
4440   The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4441   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4442   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4443   local column numbers to global column numbers in the original matrix.
4444 
4445   Level: intermediate
4446 
4447 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4448 @*/
4449 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) {
4450   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4451   PetscBool   flg;
4452 
4453   PetscFunctionBegin;
4454   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4455   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4456   if (Ad) *Ad = a->A;
4457   if (Ao) *Ao = a->B;
4458   if (colmap) *colmap = a->garray;
4459   PetscFunctionReturn(0);
4460 }
4461 
4462 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
4463   PetscInt     m, N, i, rstart, nnz, Ii;
4464   PetscInt    *indx;
4465   PetscScalar *values;
4466   MatType      rootType;
4467 
4468   PetscFunctionBegin;
4469   PetscCall(MatGetSize(inmat, &m, &N));
4470   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4471     PetscInt *dnz, *onz, sum, bs, cbs;
4472 
4473     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4474     /* Check sum(n) = N */
4475     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4476     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4477 
4478     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4479     rstart -= m;
4480 
4481     MatPreallocateBegin(comm, m, n, dnz, onz);
4482     for (i = 0; i < m; i++) {
4483       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4484       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4485       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4486     }
4487 
4488     PetscCall(MatCreate(comm, outmat));
4489     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4490     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4491     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4492     PetscCall(MatGetRootType_Private(inmat, &rootType));
4493     PetscCall(MatSetType(*outmat, rootType));
4494     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4495     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4496     MatPreallocateEnd(dnz, onz);
4497     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4498   }
4499 
4500   /* numeric phase */
4501   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4502   for (i = 0; i < m; i++) {
4503     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4504     Ii = i + rstart;
4505     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4506     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4507   }
4508   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4509   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 PetscErrorCode MatFileSplit(Mat A, char *outfile) {
4514   PetscMPIInt        rank;
4515   PetscInt           m, N, i, rstart, nnz;
4516   size_t             len;
4517   const PetscInt    *indx;
4518   PetscViewer        out;
4519   char              *name;
4520   Mat                B;
4521   const PetscScalar *values;
4522 
4523   PetscFunctionBegin;
4524   PetscCall(MatGetLocalSize(A, &m, NULL));
4525   PetscCall(MatGetSize(A, NULL, &N));
4526   /* Should this be the type of the diagonal block of A? */
4527   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4528   PetscCall(MatSetSizes(B, m, N, m, N));
4529   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4530   PetscCall(MatSetType(B, MATSEQAIJ));
4531   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4532   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4533   for (i = 0; i < m; i++) {
4534     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4535     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4536     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4537   }
4538   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4539   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4540 
4541   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4542   PetscCall(PetscStrlen(outfile, &len));
4543   PetscCall(PetscMalloc1(len + 6, &name));
4544   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4545   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4546   PetscCall(PetscFree(name));
4547   PetscCall(MatView(B, out));
4548   PetscCall(PetscViewerDestroy(&out));
4549   PetscCall(MatDestroy(&B));
4550   PetscFunctionReturn(0);
4551 }
4552 
4553 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) {
4554   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4555 
4556   PetscFunctionBegin;
4557   if (!merge) PetscFunctionReturn(0);
4558   PetscCall(PetscFree(merge->id_r));
4559   PetscCall(PetscFree(merge->len_s));
4560   PetscCall(PetscFree(merge->len_r));
4561   PetscCall(PetscFree(merge->bi));
4562   PetscCall(PetscFree(merge->bj));
4563   PetscCall(PetscFree(merge->buf_ri[0]));
4564   PetscCall(PetscFree(merge->buf_ri));
4565   PetscCall(PetscFree(merge->buf_rj[0]));
4566   PetscCall(PetscFree(merge->buf_rj));
4567   PetscCall(PetscFree(merge->coi));
4568   PetscCall(PetscFree(merge->coj));
4569   PetscCall(PetscFree(merge->owners_co));
4570   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4571   PetscCall(PetscFree(merge));
4572   PetscFunctionReturn(0);
4573 }
4574 
4575 #include <../src/mat/utils/freespace.h>
4576 #include <petscbt.h>
4577 
4578 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) {
4579   MPI_Comm             comm;
4580   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4581   PetscMPIInt          size, rank, taga, *len_s;
4582   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4583   PetscInt             proc, m;
4584   PetscInt           **buf_ri, **buf_rj;
4585   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4586   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4587   MPI_Request         *s_waits, *r_waits;
4588   MPI_Status          *status;
4589   const MatScalar     *aa, *a_a;
4590   MatScalar          **abuf_r, *ba_i;
4591   Mat_Merge_SeqsToMPI *merge;
4592   PetscContainer       container;
4593 
4594   PetscFunctionBegin;
4595   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4596   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4597 
4598   PetscCallMPI(MPI_Comm_size(comm, &size));
4599   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4600 
4601   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4602   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4603   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4604   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4605   aa = a_a;
4606 
4607   bi     = merge->bi;
4608   bj     = merge->bj;
4609   buf_ri = merge->buf_ri;
4610   buf_rj = merge->buf_rj;
4611 
4612   PetscCall(PetscMalloc1(size, &status));
4613   owners = merge->rowmap->range;
4614   len_s  = merge->len_s;
4615 
4616   /* send and recv matrix values */
4617   /*-----------------------------*/
4618   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4619   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4620 
4621   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4622   for (proc = 0, k = 0; proc < size; proc++) {
4623     if (!len_s[proc]) continue;
4624     i = owners[proc];
4625     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4626     k++;
4627   }
4628 
4629   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4630   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4631   PetscCall(PetscFree(status));
4632 
4633   PetscCall(PetscFree(s_waits));
4634   PetscCall(PetscFree(r_waits));
4635 
4636   /* insert mat values of mpimat */
4637   /*----------------------------*/
4638   PetscCall(PetscMalloc1(N, &ba_i));
4639   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4640 
4641   for (k = 0; k < merge->nrecv; k++) {
4642     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4643     nrows       = *(buf_ri_k[k]);
4644     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4645     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4646   }
4647 
4648   /* set values of ba */
4649   m = merge->rowmap->n;
4650   for (i = 0; i < m; i++) {
4651     arow = owners[rank] + i;
4652     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4653     bnzi = bi[i + 1] - bi[i];
4654     PetscCall(PetscArrayzero(ba_i, bnzi));
4655 
4656     /* add local non-zero vals of this proc's seqmat into ba */
4657     anzi   = ai[arow + 1] - ai[arow];
4658     aj     = a->j + ai[arow];
4659     aa     = a_a + ai[arow];
4660     nextaj = 0;
4661     for (j = 0; nextaj < anzi; j++) {
4662       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4663         ba_i[j] += aa[nextaj++];
4664       }
4665     }
4666 
4667     /* add received vals into ba */
4668     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4669       /* i-th row */
4670       if (i == *nextrow[k]) {
4671         anzi   = *(nextai[k] + 1) - *nextai[k];
4672         aj     = buf_rj[k] + *(nextai[k]);
4673         aa     = abuf_r[k] + *(nextai[k]);
4674         nextaj = 0;
4675         for (j = 0; nextaj < anzi; j++) {
4676           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4677             ba_i[j] += aa[nextaj++];
4678           }
4679         }
4680         nextrow[k]++;
4681         nextai[k]++;
4682       }
4683     }
4684     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4685   }
4686   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4687   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4688   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4689 
4690   PetscCall(PetscFree(abuf_r[0]));
4691   PetscCall(PetscFree(abuf_r));
4692   PetscCall(PetscFree(ba_i));
4693   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4694   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4695   PetscFunctionReturn(0);
4696 }
4697 
4698 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) {
4699   Mat                  B_mpi;
4700   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4701   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4702   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4703   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4704   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4705   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4706   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4707   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4708   MPI_Status          *status;
4709   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4710   PetscBT              lnkbt;
4711   Mat_Merge_SeqsToMPI *merge;
4712   PetscContainer       container;
4713 
4714   PetscFunctionBegin;
4715   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4716 
4717   /* make sure it is a PETSc comm */
4718   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4719   PetscCallMPI(MPI_Comm_size(comm, &size));
4720   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4721 
4722   PetscCall(PetscNew(&merge));
4723   PetscCall(PetscMalloc1(size, &status));
4724 
4725   /* determine row ownership */
4726   /*---------------------------------------------------------*/
4727   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4728   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4729   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4730   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4731   PetscCall(PetscLayoutSetUp(merge->rowmap));
4732   PetscCall(PetscMalloc1(size, &len_si));
4733   PetscCall(PetscMalloc1(size, &merge->len_s));
4734 
4735   m      = merge->rowmap->n;
4736   owners = merge->rowmap->range;
4737 
4738   /* determine the number of messages to send, their lengths */
4739   /*---------------------------------------------------------*/
4740   len_s = merge->len_s;
4741 
4742   len          = 0; /* length of buf_si[] */
4743   merge->nsend = 0;
4744   for (proc = 0; proc < size; proc++) {
4745     len_si[proc] = 0;
4746     if (proc == rank) {
4747       len_s[proc] = 0;
4748     } else {
4749       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4750       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4751     }
4752     if (len_s[proc]) {
4753       merge->nsend++;
4754       nrows = 0;
4755       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4756         if (ai[i + 1] > ai[i]) nrows++;
4757       }
4758       len_si[proc] = 2 * (nrows + 1);
4759       len += len_si[proc];
4760     }
4761   }
4762 
4763   /* determine the number and length of messages to receive for ij-structure */
4764   /*-------------------------------------------------------------------------*/
4765   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4766   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4767 
4768   /* post the Irecv of j-structure */
4769   /*-------------------------------*/
4770   PetscCall(PetscCommGetNewTag(comm, &tagj));
4771   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4772 
4773   /* post the Isend of j-structure */
4774   /*--------------------------------*/
4775   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4776 
4777   for (proc = 0, k = 0; proc < size; proc++) {
4778     if (!len_s[proc]) continue;
4779     i = owners[proc];
4780     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4781     k++;
4782   }
4783 
4784   /* receives and sends of j-structure are complete */
4785   /*------------------------------------------------*/
4786   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4787   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4788 
4789   /* send and recv i-structure */
4790   /*---------------------------*/
4791   PetscCall(PetscCommGetNewTag(comm, &tagi));
4792   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4793 
4794   PetscCall(PetscMalloc1(len + 1, &buf_s));
4795   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4796   for (proc = 0, k = 0; proc < size; proc++) {
4797     if (!len_s[proc]) continue;
4798     /* form outgoing message for i-structure:
4799          buf_si[0]:                 nrows to be sent
4800                [1:nrows]:           row index (global)
4801                [nrows+1:2*nrows+1]: i-structure index
4802     */
4803     /*-------------------------------------------*/
4804     nrows       = len_si[proc] / 2 - 1;
4805     buf_si_i    = buf_si + nrows + 1;
4806     buf_si[0]   = nrows;
4807     buf_si_i[0] = 0;
4808     nrows       = 0;
4809     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4810       anzi = ai[i + 1] - ai[i];
4811       if (anzi) {
4812         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4813         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4814         nrows++;
4815       }
4816     }
4817     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4818     k++;
4819     buf_si += len_si[proc];
4820   }
4821 
4822   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4823   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4824 
4825   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4826   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4827 
4828   PetscCall(PetscFree(len_si));
4829   PetscCall(PetscFree(len_ri));
4830   PetscCall(PetscFree(rj_waits));
4831   PetscCall(PetscFree2(si_waits, sj_waits));
4832   PetscCall(PetscFree(ri_waits));
4833   PetscCall(PetscFree(buf_s));
4834   PetscCall(PetscFree(status));
4835 
4836   /* compute a local seq matrix in each processor */
4837   /*----------------------------------------------*/
4838   /* allocate bi array and free space for accumulating nonzero column info */
4839   PetscCall(PetscMalloc1(m + 1, &bi));
4840   bi[0] = 0;
4841 
4842   /* create and initialize a linked list */
4843   nlnk = N + 1;
4844   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4845 
4846   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4847   len = ai[owners[rank + 1]] - ai[owners[rank]];
4848   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4849 
4850   current_space = free_space;
4851 
4852   /* determine symbolic info for each local row */
4853   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4854 
4855   for (k = 0; k < merge->nrecv; k++) {
4856     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4857     nrows       = *buf_ri_k[k];
4858     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4859     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4860   }
4861 
4862   MatPreallocateBegin(comm, m, n, dnz, onz);
4863   len = 0;
4864   for (i = 0; i < m; i++) {
4865     bnzi = 0;
4866     /* add local non-zero cols of this proc's seqmat into lnk */
4867     arow = owners[rank] + i;
4868     anzi = ai[arow + 1] - ai[arow];
4869     aj   = a->j + ai[arow];
4870     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4871     bnzi += nlnk;
4872     /* add received col data into lnk */
4873     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4874       if (i == *nextrow[k]) {            /* i-th row */
4875         anzi = *(nextai[k] + 1) - *nextai[k];
4876         aj   = buf_rj[k] + *nextai[k];
4877         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4878         bnzi += nlnk;
4879         nextrow[k]++;
4880         nextai[k]++;
4881       }
4882     }
4883     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4884 
4885     /* if free space is not available, make more free space */
4886     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
4887     /* copy data into free space, then initialize lnk */
4888     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
4889     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4890 
4891     current_space->array += bnzi;
4892     current_space->local_used += bnzi;
4893     current_space->local_remaining -= bnzi;
4894 
4895     bi[i + 1] = bi[i] + bnzi;
4896   }
4897 
4898   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4899 
4900   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
4901   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
4902   PetscCall(PetscLLDestroy(lnk, lnkbt));
4903 
4904   /* create symbolic parallel matrix B_mpi */
4905   /*---------------------------------------*/
4906   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
4907   PetscCall(MatCreate(comm, &B_mpi));
4908   if (n == PETSC_DECIDE) {
4909     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
4910   } else {
4911     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4912   }
4913   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
4914   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
4915   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
4916   MatPreallocateEnd(dnz, onz);
4917   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
4918 
4919   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4920   B_mpi->assembled = PETSC_FALSE;
4921   merge->bi        = bi;
4922   merge->bj        = bj;
4923   merge->buf_ri    = buf_ri;
4924   merge->buf_rj    = buf_rj;
4925   merge->coi       = NULL;
4926   merge->coj       = NULL;
4927   merge->owners_co = NULL;
4928 
4929   PetscCall(PetscCommDestroy(&comm));
4930 
4931   /* attach the supporting struct to B_mpi for reuse */
4932   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
4933   PetscCall(PetscContainerSetPointer(container, merge));
4934   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
4935   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
4936   PetscCall(PetscContainerDestroy(&container));
4937   *mpimat = B_mpi;
4938 
4939   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
4940   PetscFunctionReturn(0);
4941 }
4942 
4943 /*@C
4944       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
4945                  matrices from each processor
4946 
4947     Collective
4948 
4949    Input Parameters:
4950 +    comm - the communicators the parallel matrix will live on
4951 .    seqmat - the input sequential matrices
4952 .    m - number of local rows (or `PETSC_DECIDE`)
4953 .    n - number of local columns (or `PETSC_DECIDE`)
4954 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
4955 
4956    Output Parameter:
4957 .    mpimat - the parallel matrix generated
4958 
4959     Level: advanced
4960 
4961    Note:
4962      The dimensions of the sequential matrix in each processor MUST be the same.
4963      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4964      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
4965 @*/
4966 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) {
4967   PetscMPIInt size;
4968 
4969   PetscFunctionBegin;
4970   PetscCallMPI(MPI_Comm_size(comm, &size));
4971   if (size == 1) {
4972     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
4973     if (scall == MAT_INITIAL_MATRIX) {
4974       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
4975     } else {
4976       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
4977     }
4978     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
4979     PetscFunctionReturn(0);
4980   }
4981   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
4982   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
4983   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
4984   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
4985   PetscFunctionReturn(0);
4986 }
4987 
4988 /*@
4989      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
4990           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
4991           with `MatGetSize()`
4992 
4993     Not Collective
4994 
4995    Input Parameters:
4996 +    A - the matrix
4997 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
4998 
4999    Output Parameter:
5000 .    A_loc - the local sequential matrix generated
5001 
5002     Level: developer
5003 
5004    Notes:
5005      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5006 
5007      Destroy the matrix with `MatDestroy()`
5008 
5009 .seealso: `MatMPIAIJGetLocalMat()`
5010 @*/
5011 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) {
5012   PetscBool mpi;
5013 
5014   PetscFunctionBegin;
5015   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5016   if (mpi) {
5017     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5018   } else {
5019     *A_loc = A;
5020     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5021   }
5022   PetscFunctionReturn(0);
5023 }
5024 
5025 /*@
5026      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5027           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5028           with `MatGetSize()`
5029 
5030     Not Collective
5031 
5032    Input Parameters:
5033 +    A - the matrix
5034 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5035 
5036    Output Parameter:
5037 .    A_loc - the local sequential matrix generated
5038 
5039     Level: developer
5040 
5041    Notes:
5042      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5043 
5044      When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A.
5045      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called.
5046      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5047      modify the values of the returned A_loc.
5048 
5049 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5050 @*/
5051 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) {
5052   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5053   Mat_SeqAIJ        *mat, *a, *b;
5054   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5055   const PetscScalar *aa, *ba, *aav, *bav;
5056   PetscScalar       *ca, *cam;
5057   PetscMPIInt        size;
5058   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5059   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5060   PetscBool          match;
5061 
5062   PetscFunctionBegin;
5063   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5064   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5065   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5066   if (size == 1) {
5067     if (scall == MAT_INITIAL_MATRIX) {
5068       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5069       *A_loc = mpimat->A;
5070     } else if (scall == MAT_REUSE_MATRIX) {
5071       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5072     }
5073     PetscFunctionReturn(0);
5074   }
5075 
5076   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5077   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5078   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5079   ai = a->i;
5080   aj = a->j;
5081   bi = b->i;
5082   bj = b->j;
5083   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5084   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5085   aa = aav;
5086   ba = bav;
5087   if (scall == MAT_INITIAL_MATRIX) {
5088     PetscCall(PetscMalloc1(1 + am, &ci));
5089     ci[0] = 0;
5090     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5091     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5092     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5093     k = 0;
5094     for (i = 0; i < am; i++) {
5095       ncols_o = bi[i + 1] - bi[i];
5096       ncols_d = ai[i + 1] - ai[i];
5097       /* off-diagonal portion of A */
5098       for (jo = 0; jo < ncols_o; jo++) {
5099         col = cmap[*bj];
5100         if (col >= cstart) break;
5101         cj[k] = col;
5102         bj++;
5103         ca[k++] = *ba++;
5104       }
5105       /* diagonal portion of A */
5106       for (j = 0; j < ncols_d; j++) {
5107         cj[k]   = cstart + *aj++;
5108         ca[k++] = *aa++;
5109       }
5110       /* off-diagonal portion of A */
5111       for (j = jo; j < ncols_o; j++) {
5112         cj[k]   = cmap[*bj++];
5113         ca[k++] = *ba++;
5114       }
5115     }
5116     /* put together the new matrix */
5117     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5118     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5119     /* Since these are PETSc arrays, change flags to free them as necessary. */
5120     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5121     mat->free_a  = PETSC_TRUE;
5122     mat->free_ij = PETSC_TRUE;
5123     mat->nonew   = 0;
5124   } else if (scall == MAT_REUSE_MATRIX) {
5125     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5126     ci  = mat->i;
5127     cj  = mat->j;
5128     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5129     for (i = 0; i < am; i++) {
5130       /* off-diagonal portion of A */
5131       ncols_o = bi[i + 1] - bi[i];
5132       for (jo = 0; jo < ncols_o; jo++) {
5133         col = cmap[*bj];
5134         if (col >= cstart) break;
5135         *cam++ = *ba++;
5136         bj++;
5137       }
5138       /* diagonal portion of A */
5139       ncols_d = ai[i + 1] - ai[i];
5140       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5141       /* off-diagonal portion of A */
5142       for (j = jo; j < ncols_o; j++) {
5143         *cam++ = *ba++;
5144         bj++;
5145       }
5146     }
5147     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5148   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5149   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5150   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5151   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5152   PetscFunctionReturn(0);
5153 }
5154 
5155 /*@
5156      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5157           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5158 
5159     Not Collective
5160 
5161    Input Parameters:
5162 +    A - the matrix
5163 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5164 
5165    Output Parameters:
5166 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5167 -    A_loc - the local sequential matrix generated
5168 
5169     Level: developer
5170 
5171    Note:
5172      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering)
5173 
5174 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5175 @*/
5176 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) {
5177   Mat             Ao, Ad;
5178   const PetscInt *cmap;
5179   PetscMPIInt     size;
5180   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5181 
5182   PetscFunctionBegin;
5183   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5184   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5185   if (size == 1) {
5186     if (scall == MAT_INITIAL_MATRIX) {
5187       PetscCall(PetscObjectReference((PetscObject)Ad));
5188       *A_loc = Ad;
5189     } else if (scall == MAT_REUSE_MATRIX) {
5190       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5191     }
5192     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5193     PetscFunctionReturn(0);
5194   }
5195   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5196   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5197   if (f) {
5198     PetscCall((*f)(A, scall, glob, A_loc));
5199   } else {
5200     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5201     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5202     Mat_SeqAIJ        *c;
5203     PetscInt          *ai = a->i, *aj = a->j;
5204     PetscInt          *bi = b->i, *bj = b->j;
5205     PetscInt          *ci, *cj;
5206     const PetscScalar *aa, *ba;
5207     PetscScalar       *ca;
5208     PetscInt           i, j, am, dn, on;
5209 
5210     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5211     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5212     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5213     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5214     if (scall == MAT_INITIAL_MATRIX) {
5215       PetscInt k;
5216       PetscCall(PetscMalloc1(1 + am, &ci));
5217       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5218       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5219       ci[0] = 0;
5220       for (i = 0, k = 0; i < am; i++) {
5221         const PetscInt ncols_o = bi[i + 1] - bi[i];
5222         const PetscInt ncols_d = ai[i + 1] - ai[i];
5223         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5224         /* diagonal portion of A */
5225         for (j = 0; j < ncols_d; j++, k++) {
5226           cj[k] = *aj++;
5227           ca[k] = *aa++;
5228         }
5229         /* off-diagonal portion of A */
5230         for (j = 0; j < ncols_o; j++, k++) {
5231           cj[k] = dn + *bj++;
5232           ca[k] = *ba++;
5233         }
5234       }
5235       /* put together the new matrix */
5236       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5237       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5238       /* Since these are PETSc arrays, change flags to free them as necessary. */
5239       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5240       c->free_a  = PETSC_TRUE;
5241       c->free_ij = PETSC_TRUE;
5242       c->nonew   = 0;
5243       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5244     } else if (scall == MAT_REUSE_MATRIX) {
5245       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5246       for (i = 0; i < am; i++) {
5247         const PetscInt ncols_d = ai[i + 1] - ai[i];
5248         const PetscInt ncols_o = bi[i + 1] - bi[i];
5249         /* diagonal portion of A */
5250         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5251         /* off-diagonal portion of A */
5252         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5253       }
5254       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5255     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5256     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5257     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5258     if (glob) {
5259       PetscInt cst, *gidx;
5260 
5261       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5262       PetscCall(PetscMalloc1(dn + on, &gidx));
5263       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5264       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5265       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5266     }
5267   }
5268   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5269   PetscFunctionReturn(0);
5270 }
5271 
5272 /*@C
5273      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5274 
5275     Not Collective
5276 
5277    Input Parameters:
5278 +    A - the matrix
5279 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5280 -    row, col - index sets of rows and columns to extract (or NULL)
5281 
5282    Output Parameter:
5283 .    A_loc - the local sequential matrix generated
5284 
5285     Level: developer
5286 
5287 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5288 @*/
5289 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) {
5290   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5291   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5292   IS          isrowa, iscola;
5293   Mat        *aloc;
5294   PetscBool   match;
5295 
5296   PetscFunctionBegin;
5297   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5298   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5299   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5300   if (!row) {
5301     start = A->rmap->rstart;
5302     end   = A->rmap->rend;
5303     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5304   } else {
5305     isrowa = *row;
5306   }
5307   if (!col) {
5308     start = A->cmap->rstart;
5309     cmap  = a->garray;
5310     nzA   = a->A->cmap->n;
5311     nzB   = a->B->cmap->n;
5312     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5313     ncols = 0;
5314     for (i = 0; i < nzB; i++) {
5315       if (cmap[i] < start) idx[ncols++] = cmap[i];
5316       else break;
5317     }
5318     imark = i;
5319     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5320     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5321     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5322   } else {
5323     iscola = *col;
5324   }
5325   if (scall != MAT_INITIAL_MATRIX) {
5326     PetscCall(PetscMalloc1(1, &aloc));
5327     aloc[0] = *A_loc;
5328   }
5329   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5330   if (!col) { /* attach global id of condensed columns */
5331     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5332   }
5333   *A_loc = aloc[0];
5334   PetscCall(PetscFree(aloc));
5335   if (!row) PetscCall(ISDestroy(&isrowa));
5336   if (!col) PetscCall(ISDestroy(&iscola));
5337   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5338   PetscFunctionReturn(0);
5339 }
5340 
5341 /*
5342  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5343  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5344  * on a global size.
5345  * */
5346 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) {
5347   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5348   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5349   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5350   PetscMPIInt            owner;
5351   PetscSFNode           *iremote, *oiremote;
5352   const PetscInt        *lrowindices;
5353   PetscSF                sf, osf;
5354   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5355   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5356   MPI_Comm               comm;
5357   ISLocalToGlobalMapping mapping;
5358   const PetscScalar     *pd_a, *po_a;
5359 
5360   PetscFunctionBegin;
5361   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5362   /* plocalsize is the number of roots
5363    * nrows is the number of leaves
5364    * */
5365   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5366   PetscCall(ISGetLocalSize(rows, &nrows));
5367   PetscCall(PetscCalloc1(nrows, &iremote));
5368   PetscCall(ISGetIndices(rows, &lrowindices));
5369   for (i = 0; i < nrows; i++) {
5370     /* Find a remote index and an owner for a row
5371      * The row could be local or remote
5372      * */
5373     owner = 0;
5374     lidx  = 0;
5375     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5376     iremote[i].index = lidx;
5377     iremote[i].rank  = owner;
5378   }
5379   /* Create SF to communicate how many nonzero columns for each row */
5380   PetscCall(PetscSFCreate(comm, &sf));
5381   /* SF will figure out the number of nonzero colunms for each row, and their
5382    * offsets
5383    * */
5384   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5385   PetscCall(PetscSFSetFromOptions(sf));
5386   PetscCall(PetscSFSetUp(sf));
5387 
5388   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5389   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5390   PetscCall(PetscCalloc1(nrows, &pnnz));
5391   roffsets[0] = 0;
5392   roffsets[1] = 0;
5393   for (i = 0; i < plocalsize; i++) {
5394     /* diag */
5395     nrcols[i * 2 + 0]         = pd->i[i + 1] - pd->i[i];
5396     /* off diag */
5397     nrcols[i * 2 + 1]         = po->i[i + 1] - po->i[i];
5398     /* compute offsets so that we relative location for each row */
5399     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5400     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5401   }
5402   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5403   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5404   /* 'r' means root, and 'l' means leaf */
5405   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5406   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5407   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5408   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5409   PetscCall(PetscSFDestroy(&sf));
5410   PetscCall(PetscFree(roffsets));
5411   PetscCall(PetscFree(nrcols));
5412   dntotalcols = 0;
5413   ontotalcols = 0;
5414   ncol        = 0;
5415   for (i = 0; i < nrows; i++) {
5416     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5417     ncol    = PetscMax(pnnz[i], ncol);
5418     /* diag */
5419     dntotalcols += nlcols[i * 2 + 0];
5420     /* off diag */
5421     ontotalcols += nlcols[i * 2 + 1];
5422   }
5423   /* We do not need to figure the right number of columns
5424    * since all the calculations will be done by going through the raw data
5425    * */
5426   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5427   PetscCall(MatSetUp(*P_oth));
5428   PetscCall(PetscFree(pnnz));
5429   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5430   /* diag */
5431   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5432   /* off diag */
5433   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5434   /* diag */
5435   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5436   /* off diag */
5437   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5438   dntotalcols = 0;
5439   ontotalcols = 0;
5440   ntotalcols  = 0;
5441   for (i = 0; i < nrows; i++) {
5442     owner = 0;
5443     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5444     /* Set iremote for diag matrix */
5445     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5446       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5447       iremote[dntotalcols].rank  = owner;
5448       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5449       ilocal[dntotalcols++]      = ntotalcols++;
5450     }
5451     /* off diag */
5452     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5453       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5454       oiremote[ontotalcols].rank  = owner;
5455       oilocal[ontotalcols++]      = ntotalcols++;
5456     }
5457   }
5458   PetscCall(ISRestoreIndices(rows, &lrowindices));
5459   PetscCall(PetscFree(loffsets));
5460   PetscCall(PetscFree(nlcols));
5461   PetscCall(PetscSFCreate(comm, &sf));
5462   /* P serves as roots and P_oth is leaves
5463    * Diag matrix
5464    * */
5465   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5466   PetscCall(PetscSFSetFromOptions(sf));
5467   PetscCall(PetscSFSetUp(sf));
5468 
5469   PetscCall(PetscSFCreate(comm, &osf));
5470   /* Off diag */
5471   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5472   PetscCall(PetscSFSetFromOptions(osf));
5473   PetscCall(PetscSFSetUp(osf));
5474   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5475   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5476   /* We operate on the matrix internal data for saving memory */
5477   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5478   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5479   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5480   /* Convert to global indices for diag matrix */
5481   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5482   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5483   /* We want P_oth store global indices */
5484   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5485   /* Use memory scalable approach */
5486   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5487   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5488   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5489   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5490   /* Convert back to local indices */
5491   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5492   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5493   nout = 0;
5494   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5495   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5496   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5497   /* Exchange values */
5498   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5499   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5500   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5501   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5502   /* Stop PETSc from shrinking memory */
5503   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5504   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5505   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5506   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5507   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5508   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5509   PetscCall(PetscSFDestroy(&sf));
5510   PetscCall(PetscSFDestroy(&osf));
5511   PetscFunctionReturn(0);
5512 }
5513 
5514 /*
5515  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5516  * This supports MPIAIJ and MAIJ
5517  * */
5518 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) {
5519   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5520   Mat_SeqAIJ *p_oth;
5521   IS          rows, map;
5522   PetscHMapI  hamp;
5523   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5524   MPI_Comm    comm;
5525   PetscSF     sf, osf;
5526   PetscBool   has;
5527 
5528   PetscFunctionBegin;
5529   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5530   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5531   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5532    *  and then create a submatrix (that often is an overlapping matrix)
5533    * */
5534   if (reuse == MAT_INITIAL_MATRIX) {
5535     /* Use a hash table to figure out unique keys */
5536     PetscCall(PetscHMapICreate(&hamp));
5537     PetscCall(PetscHMapIResize(hamp, a->B->cmap->n));
5538     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5539     count = 0;
5540     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5541     for (i = 0; i < a->B->cmap->n; i++) {
5542       key = a->garray[i] / dof;
5543       PetscCall(PetscHMapIHas(hamp, key, &has));
5544       if (!has) {
5545         mapping[i] = count;
5546         PetscCall(PetscHMapISet(hamp, key, count++));
5547       } else {
5548         /* Current 'i' has the same value the previous step */
5549         mapping[i] = count - 1;
5550       }
5551     }
5552     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5553     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5554     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ", htsize, count);
5555     PetscCall(PetscCalloc1(htsize, &rowindices));
5556     off = 0;
5557     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5558     PetscCall(PetscHMapIDestroy(&hamp));
5559     PetscCall(PetscSortInt(htsize, rowindices));
5560     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5561     /* In case, the matrix was already created but users want to recreate the matrix */
5562     PetscCall(MatDestroy(P_oth));
5563     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5564     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5565     PetscCall(ISDestroy(&map));
5566     PetscCall(ISDestroy(&rows));
5567   } else if (reuse == MAT_REUSE_MATRIX) {
5568     /* If matrix was already created, we simply update values using SF objects
5569      * that as attached to the matrix ealier.
5570      */
5571     const PetscScalar *pd_a, *po_a;
5572 
5573     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5574     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5575     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5576     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5577     /* Update values in place */
5578     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5579     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5580     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5581     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5582     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5583     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5584     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5585     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5586   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5587   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5588   PetscFunctionReturn(0);
5589 }
5590 
5591 /*@C
5592   MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A
5593 
5594   Collective on A
5595 
5596   Input Parameters:
5597 + A - the first matrix in `MATMPIAIJ` format
5598 . B - the second matrix in `MATMPIAIJ` format
5599 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5600 
5601   Output Parameters:
5602 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5603 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5604 - B_seq - the sequential matrix generated
5605 
5606   Level: developer
5607 
5608 @*/
5609 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) {
5610   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5611   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5612   IS          isrowb, iscolb;
5613   Mat        *bseq = NULL;
5614 
5615   PetscFunctionBegin;
5616   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5617     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5618   }
5619   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5620 
5621   if (scall == MAT_INITIAL_MATRIX) {
5622     start = A->cmap->rstart;
5623     cmap  = a->garray;
5624     nzA   = a->A->cmap->n;
5625     nzB   = a->B->cmap->n;
5626     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5627     ncols = 0;
5628     for (i = 0; i < nzB; i++) { /* row < local row index */
5629       if (cmap[i] < start) idx[ncols++] = cmap[i];
5630       else break;
5631     }
5632     imark = i;
5633     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5634     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5635     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5636     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5637   } else {
5638     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5639     isrowb = *rowb;
5640     iscolb = *colb;
5641     PetscCall(PetscMalloc1(1, &bseq));
5642     bseq[0] = *B_seq;
5643   }
5644   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5645   *B_seq = bseq[0];
5646   PetscCall(PetscFree(bseq));
5647   if (!rowb) {
5648     PetscCall(ISDestroy(&isrowb));
5649   } else {
5650     *rowb = isrowb;
5651   }
5652   if (!colb) {
5653     PetscCall(ISDestroy(&iscolb));
5654   } else {
5655     *colb = iscolb;
5656   }
5657   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5658   PetscFunctionReturn(0);
5659 }
5660 
5661 /*
5662     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5663     of the OFF-DIAGONAL portion of local A
5664 
5665     Collective on Mat
5666 
5667    Input Parameters:
5668 +    A,B - the matrices in mpiaij format
5669 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5670 
5671    Output Parameter:
5672 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5673 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5674 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5675 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5676 
5677     Developer Note:
5678     This directly accesses information inside the VecScatter associated with the matrix-vector product
5679      for this matrix. This is not desirable..
5680 
5681     Level: developer
5682 
5683 */
5684 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) {
5685   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5686   Mat_SeqAIJ        *b_oth;
5687   VecScatter         ctx;
5688   MPI_Comm           comm;
5689   const PetscMPIInt *rprocs, *sprocs;
5690   const PetscInt    *srow, *rstarts, *sstarts;
5691   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5692   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5693   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5694   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5695   PetscMPIInt        size, tag, rank, nreqs;
5696 
5697   PetscFunctionBegin;
5698   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5699   PetscCallMPI(MPI_Comm_size(comm, &size));
5700 
5701   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5702     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5703   }
5704   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5705   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5706 
5707   if (size == 1) {
5708     startsj_s = NULL;
5709     bufa_ptr  = NULL;
5710     *B_oth    = NULL;
5711     PetscFunctionReturn(0);
5712   }
5713 
5714   ctx = a->Mvctx;
5715   tag = ((PetscObject)ctx)->tag;
5716 
5717   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5718   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5719   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5720   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5721   PetscCall(PetscMalloc1(nreqs, &reqs));
5722   rwaits = reqs;
5723   swaits = reqs + nrecvs;
5724 
5725   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5726   if (scall == MAT_INITIAL_MATRIX) {
5727     /* i-array */
5728     /*---------*/
5729     /*  post receives */
5730     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5731     for (i = 0; i < nrecvs; i++) {
5732       rowlen = rvalues + rstarts[i] * rbs;
5733       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5734       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5735     }
5736 
5737     /* pack the outgoing message */
5738     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5739 
5740     sstartsj[0] = 0;
5741     rstartsj[0] = 0;
5742     len         = 0; /* total length of j or a array to be sent */
5743     if (nsends) {
5744       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5745       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5746     }
5747     for (i = 0; i < nsends; i++) {
5748       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5749       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5750       for (j = 0; j < nrows; j++) {
5751         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5752         for (l = 0; l < sbs; l++) {
5753           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5754 
5755           rowlen[j * sbs + l] = ncols;
5756 
5757           len += ncols;
5758           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5759         }
5760         k++;
5761       }
5762       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5763 
5764       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5765     }
5766     /* recvs and sends of i-array are completed */
5767     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5768     PetscCall(PetscFree(svalues));
5769 
5770     /* allocate buffers for sending j and a arrays */
5771     PetscCall(PetscMalloc1(len + 1, &bufj));
5772     PetscCall(PetscMalloc1(len + 1, &bufa));
5773 
5774     /* create i-array of B_oth */
5775     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5776 
5777     b_othi[0] = 0;
5778     len       = 0; /* total length of j or a array to be received */
5779     k         = 0;
5780     for (i = 0; i < nrecvs; i++) {
5781       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5782       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5783       for (j = 0; j < nrows; j++) {
5784         b_othi[k + 1] = b_othi[k] + rowlen[j];
5785         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5786         k++;
5787       }
5788       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5789     }
5790     PetscCall(PetscFree(rvalues));
5791 
5792     /* allocate space for j and a arrays of B_oth */
5793     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5794     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5795 
5796     /* j-array */
5797     /*---------*/
5798     /*  post receives of j-array */
5799     for (i = 0; i < nrecvs; i++) {
5800       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5801       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5802     }
5803 
5804     /* pack the outgoing message j-array */
5805     if (nsends) k = sstarts[0];
5806     for (i = 0; i < nsends; i++) {
5807       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5808       bufJ  = bufj + sstartsj[i];
5809       for (j = 0; j < nrows; j++) {
5810         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5811         for (ll = 0; ll < sbs; ll++) {
5812           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5813           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5814           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5815         }
5816       }
5817       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5818     }
5819 
5820     /* recvs and sends of j-array are completed */
5821     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5822   } else if (scall == MAT_REUSE_MATRIX) {
5823     sstartsj = *startsj_s;
5824     rstartsj = *startsj_r;
5825     bufa     = *bufa_ptr;
5826     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5827     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5828   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5829 
5830   /* a-array */
5831   /*---------*/
5832   /*  post receives of a-array */
5833   for (i = 0; i < nrecvs; i++) {
5834     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5835     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5836   }
5837 
5838   /* pack the outgoing message a-array */
5839   if (nsends) k = sstarts[0];
5840   for (i = 0; i < nsends; i++) {
5841     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5842     bufA  = bufa + sstartsj[i];
5843     for (j = 0; j < nrows; j++) {
5844       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5845       for (ll = 0; ll < sbs; ll++) {
5846         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5847         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5848         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5849       }
5850     }
5851     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5852   }
5853   /* recvs and sends of a-array are completed */
5854   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5855   PetscCall(PetscFree(reqs));
5856 
5857   if (scall == MAT_INITIAL_MATRIX) {
5858     /* put together the new matrix */
5859     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5860 
5861     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5862     /* Since these are PETSc arrays, change flags to free them as necessary. */
5863     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5864     b_oth->free_a  = PETSC_TRUE;
5865     b_oth->free_ij = PETSC_TRUE;
5866     b_oth->nonew   = 0;
5867 
5868     PetscCall(PetscFree(bufj));
5869     if (!startsj_s || !bufa_ptr) {
5870       PetscCall(PetscFree2(sstartsj, rstartsj));
5871       PetscCall(PetscFree(bufa_ptr));
5872     } else {
5873       *startsj_s = sstartsj;
5874       *startsj_r = rstartsj;
5875       *bufa_ptr  = bufa;
5876     }
5877   } else if (scall == MAT_REUSE_MATRIX) {
5878     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5879   }
5880 
5881   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
5882   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
5883   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5884   PetscFunctionReturn(0);
5885 }
5886 
5887 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5888 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5889 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
5890 #if defined(PETSC_HAVE_MKL_SPARSE)
5891 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5892 #endif
5893 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5894 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
5895 #if defined(PETSC_HAVE_ELEMENTAL)
5896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
5897 #endif
5898 #if defined(PETSC_HAVE_SCALAPACK)
5899 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
5900 #endif
5901 #if defined(PETSC_HAVE_HYPRE)
5902 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
5903 #endif
5904 #if defined(PETSC_HAVE_CUDA)
5905 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
5906 #endif
5907 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5908 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
5909 #endif
5910 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
5911 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
5912 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5913 
5914 /*
5915     Computes (B'*A')' since computing B*A directly is untenable
5916 
5917                n                       p                          p
5918         [             ]       [             ]         [                 ]
5919       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5920         [             ]       [             ]         [                 ]
5921 
5922 */
5923 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) {
5924   Mat At, Bt, Ct;
5925 
5926   PetscFunctionBegin;
5927   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
5928   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
5929   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
5930   PetscCall(MatDestroy(&At));
5931   PetscCall(MatDestroy(&Bt));
5932   PetscCall(MatTransposeSetPrecursor(Ct, C));
5933   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
5934   PetscCall(MatDestroy(&Ct));
5935   PetscFunctionReturn(0);
5936 }
5937 
5938 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) {
5939   PetscBool cisdense;
5940 
5941   PetscFunctionBegin;
5942   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
5943   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
5944   PetscCall(MatSetBlockSizesFromMats(C, A, B));
5945   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
5946   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
5947   PetscCall(MatSetUp(C));
5948 
5949   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5950   PetscFunctionReturn(0);
5951 }
5952 
5953 /* ----------------------------------------------------------------*/
5954 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) {
5955   Mat_Product *product = C->product;
5956   Mat          A = product->A, B = product->B;
5957 
5958   PetscFunctionBegin;
5959   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5960     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5961 
5962   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5963   C->ops->productsymbolic = MatProductSymbolic_AB;
5964   PetscFunctionReturn(0);
5965 }
5966 
5967 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) {
5968   Mat_Product *product = C->product;
5969 
5970   PetscFunctionBegin;
5971   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5972   PetscFunctionReturn(0);
5973 }
5974 
5975 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
5976 
5977   Input Parameters:
5978 
5979     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5980     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5981 
5982     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
5983 
5984     For Set1, j1[] contains column indices of the nonzeros.
5985     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5986     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
5987     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
5988 
5989     Similar for Set2.
5990 
5991     This routine merges the two sets of nonzeros row by row and removes repeats.
5992 
5993   Output Parameters: (memory is allocated by the caller)
5994 
5995     i[],j[]: the CSR of the merged matrix, which has m rows.
5996     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
5997     imap2[]: similar to imap1[], but for Set2.
5998     Note we order nonzeros row-by-row and from left to right.
5999 */
6000 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) {
6001   PetscInt   r, m; /* Row index of mat */
6002   PetscCount t, t1, t2, b1, e1, b2, e2;
6003 
6004   PetscFunctionBegin;
6005   PetscCall(MatGetLocalSize(mat, &m, NULL));
6006   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6007   i[0]        = 0;
6008   for (r = 0; r < m; r++) { /* Do row by row merging */
6009     b1 = rowBegin1[r];
6010     e1 = rowEnd1[r];
6011     b2 = rowBegin2[r];
6012     e2 = rowEnd2[r];
6013     while (b1 < e1 && b2 < e2) {
6014       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6015         j[t]      = j1[b1];
6016         imap1[t1] = t;
6017         imap2[t2] = t;
6018         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6019         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6020         t1++;
6021         t2++;
6022         t++;
6023       } else if (j1[b1] < j2[b2]) {
6024         j[t]      = j1[b1];
6025         imap1[t1] = t;
6026         b1 += jmap1[t1 + 1] - jmap1[t1];
6027         t1++;
6028         t++;
6029       } else {
6030         j[t]      = j2[b2];
6031         imap2[t2] = t;
6032         b2 += jmap2[t2 + 1] - jmap2[t2];
6033         t2++;
6034         t++;
6035       }
6036     }
6037     /* Merge the remaining in either j1[] or j2[] */
6038     while (b1 < e1) {
6039       j[t]      = j1[b1];
6040       imap1[t1] = t;
6041       b1 += jmap1[t1 + 1] - jmap1[t1];
6042       t1++;
6043       t++;
6044     }
6045     while (b2 < e2) {
6046       j[t]      = j2[b2];
6047       imap2[t2] = t;
6048       b2 += jmap2[t2 + 1] - jmap2[t2];
6049       t2++;
6050       t++;
6051     }
6052     i[r + 1] = t;
6053   }
6054   PetscFunctionReturn(0);
6055 }
6056 
6057 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6058 
6059   Input Parameters:
6060     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6061     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6062       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6063 
6064       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6065       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6066 
6067   Output Parameters:
6068     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6069     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6070       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6071       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6072 
6073     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6074       Atot: number of entries belonging to the diagonal block.
6075       Annz: number of unique nonzeros belonging to the diagonal block.
6076       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6077         repeats (i.e., same 'i,j' pair).
6078       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6079         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6080 
6081       Atot: number of entries belonging to the diagonal block
6082       Annz: number of unique nonzeros belonging to the diagonal block.
6083 
6084     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6085 
6086     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6087 */
6088 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) {
6089   PetscInt    cstart, cend, rstart, rend, row, col;
6090   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6091   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6092   PetscCount  k, m, p, q, r, s, mid;
6093   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6094 
6095   PetscFunctionBegin;
6096   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6097   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6098   m = rend - rstart;
6099 
6100   for (k = 0; k < n; k++) {
6101     if (i[k] >= 0) break;
6102   } /* Skip negative rows */
6103 
6104   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6105      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6106   */
6107   while (k < n) {
6108     row = i[k];
6109     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6110     for (s = k; s < n; s++)
6111       if (i[s] != row) break;
6112     for (p = k; p < s; p++) {
6113       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6114       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6115     }
6116     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6117     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6118     rowBegin[row - rstart] = k;
6119     rowMid[row - rstart]   = mid;
6120     rowEnd[row - rstart]   = s;
6121 
6122     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6123     Atot += mid - k;
6124     Btot += s - mid;
6125 
6126     /* Count unique nonzeros of this diag/offdiag row */
6127     for (p = k; p < mid;) {
6128       col = j[p];
6129       do {
6130         j[p] += PETSC_MAX_INT;
6131         p++;
6132       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6133       Annz++;
6134     }
6135 
6136     for (p = mid; p < s;) {
6137       col = j[p];
6138       do { p++; } while (p < s && j[p] == col);
6139       Bnnz++;
6140     }
6141     k = s;
6142   }
6143 
6144   /* Allocation according to Atot, Btot, Annz, Bnnz */
6145   PetscCall(PetscMalloc1(Atot, &Aperm));
6146   PetscCall(PetscMalloc1(Btot, &Bperm));
6147   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6148   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6149 
6150   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6151   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6152   for (r = 0; r < m; r++) {
6153     k   = rowBegin[r];
6154     mid = rowMid[r];
6155     s   = rowEnd[r];
6156     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6157     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6158     Atot += mid - k;
6159     Btot += s - mid;
6160 
6161     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6162     for (p = k; p < mid;) {
6163       col = j[p];
6164       q   = p;
6165       do { p++; } while (p < mid && j[p] == col);
6166       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6167       Annz++;
6168     }
6169 
6170     for (p = mid; p < s;) {
6171       col = j[p];
6172       q   = p;
6173       do { p++; } while (p < s && j[p] == col);
6174       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6175       Bnnz++;
6176     }
6177   }
6178   /* Output */
6179   *Aperm_ = Aperm;
6180   *Annz_  = Annz;
6181   *Atot_  = Atot;
6182   *Ajmap_ = Ajmap;
6183   *Bperm_ = Bperm;
6184   *Bnnz_  = Bnnz;
6185   *Btot_  = Btot;
6186   *Bjmap_ = Bjmap;
6187   PetscFunctionReturn(0);
6188 }
6189 
6190 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6191 
6192   Input Parameters:
6193     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6194     nnz:  number of unique nonzeros in the merged matrix
6195     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6196     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6197 
6198   Output Parameter: (memory is allocated by the caller)
6199     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6200 
6201   Example:
6202     nnz1 = 4
6203     nnz  = 6
6204     imap = [1,3,4,5]
6205     jmap = [0,3,5,6,7]
6206    then,
6207     jmap_new = [0,0,3,3,5,6,7]
6208 */
6209 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) {
6210   PetscCount k, p;
6211 
6212   PetscFunctionBegin;
6213   jmap_new[0] = 0;
6214   p           = nnz;                /* p loops over jmap_new[] backwards */
6215   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6216     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6217   }
6218   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6219   PetscFunctionReturn(0);
6220 }
6221 
6222 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) {
6223   MPI_Comm    comm;
6224   PetscMPIInt rank, size;
6225   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6226   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6227   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6228 
6229   PetscFunctionBegin;
6230   PetscCall(PetscFree(mpiaij->garray));
6231   PetscCall(VecDestroy(&mpiaij->lvec));
6232 #if defined(PETSC_USE_CTABLE)
6233   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6234 #else
6235   PetscCall(PetscFree(mpiaij->colmap));
6236 #endif
6237   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6238   mat->assembled     = PETSC_FALSE;
6239   mat->was_assembled = PETSC_FALSE;
6240   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6241 
6242   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6243   PetscCallMPI(MPI_Comm_size(comm, &size));
6244   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6245   PetscCall(PetscLayoutSetUp(mat->rmap));
6246   PetscCall(PetscLayoutSetUp(mat->cmap));
6247   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6248   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6249   PetscCall(MatGetLocalSize(mat, &m, &n));
6250   PetscCall(MatGetSize(mat, &M, &N));
6251 
6252   /* ---------------------------------------------------------------------------*/
6253   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6254   /* entries come first, then local rows, then remote rows.                     */
6255   /* ---------------------------------------------------------------------------*/
6256   PetscCount n1 = coo_n, *perm1;
6257   PetscInt  *i1 = coo_i, *j1 = coo_j;
6258 
6259   PetscCall(PetscMalloc1(n1, &perm1));
6260   for (k = 0; k < n1; k++) perm1[k] = k;
6261 
6262   /* Manipulate indices so that entries with negative row or col indices will have smallest
6263      row indices, local entries will have greater but negative row indices, and remote entries
6264      will have positive row indices.
6265   */
6266   for (k = 0; k < n1; k++) {
6267     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6268     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6269     else {
6270       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6271       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6272     }
6273   }
6274 
6275   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6276   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6277   for (k = 0; k < n1; k++) {
6278     if (i1[k] > PETSC_MIN_INT) break;
6279   }                                                                               /* Advance k to the first entry we need to take care of */
6280   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6281   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6282 
6283   /* ---------------------------------------------------------------------------*/
6284   /*           Split local rows into diag/offdiag portions                      */
6285   /* ---------------------------------------------------------------------------*/
6286   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6287   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6288   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6289 
6290   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6291   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6292   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6293 
6294   /* ---------------------------------------------------------------------------*/
6295   /*           Send remote rows to their owner                                  */
6296   /* ---------------------------------------------------------------------------*/
6297   /* Find which rows should be sent to which remote ranks*/
6298   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6299   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6300   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6301   const PetscInt *ranges;
6302   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6303 
6304   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6305   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6306   for (k = rem; k < n1;) {
6307     PetscMPIInt owner;
6308     PetscInt    firstRow, lastRow;
6309 
6310     /* Locate a row range */
6311     firstRow = i1[k]; /* first row of this owner */
6312     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6313     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6314 
6315     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6316     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6317 
6318     /* All entries in [k,p) belong to this remote owner */
6319     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6320       PetscMPIInt *sendto2;
6321       PetscInt    *nentries2;
6322       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6323 
6324       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6325       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6326       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6327       PetscCall(PetscFree2(sendto, nentries2));
6328       sendto   = sendto2;
6329       nentries = nentries2;
6330       maxNsend = maxNsend2;
6331     }
6332     sendto[nsend]   = owner;
6333     nentries[nsend] = p - k;
6334     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6335     nsend++;
6336     k = p;
6337   }
6338 
6339   /* Build 1st SF to know offsets on remote to send data */
6340   PetscSF      sf1;
6341   PetscInt     nroots = 1, nroots2 = 0;
6342   PetscInt     nleaves = nsend, nleaves2 = 0;
6343   PetscInt    *offsets;
6344   PetscSFNode *iremote;
6345 
6346   PetscCall(PetscSFCreate(comm, &sf1));
6347   PetscCall(PetscMalloc1(nsend, &iremote));
6348   PetscCall(PetscMalloc1(nsend, &offsets));
6349   for (k = 0; k < nsend; k++) {
6350     iremote[k].rank  = sendto[k];
6351     iremote[k].index = 0;
6352     nleaves2 += nentries[k];
6353     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6354   }
6355   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6356   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6357   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6358   PetscCall(PetscSFDestroy(&sf1));
6359   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6360 
6361   /* Build 2nd SF to send remote COOs to their owner */
6362   PetscSF sf2;
6363   nroots  = nroots2;
6364   nleaves = nleaves2;
6365   PetscCall(PetscSFCreate(comm, &sf2));
6366   PetscCall(PetscSFSetFromOptions(sf2));
6367   PetscCall(PetscMalloc1(nleaves, &iremote));
6368   p = 0;
6369   for (k = 0; k < nsend; k++) {
6370     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6371     for (q = 0; q < nentries[k]; q++, p++) {
6372       iremote[p].rank  = sendto[k];
6373       iremote[p].index = offsets[k] + q;
6374     }
6375   }
6376   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6377 
6378   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6379   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6380 
6381   /* Send the remote COOs to their owner */
6382   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6383   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6384   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6385   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6386   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6387   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6388   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6389 
6390   PetscCall(PetscFree(offsets));
6391   PetscCall(PetscFree2(sendto, nentries));
6392 
6393   /* ---------------------------------------------------------------*/
6394   /* Sort received COOs by row along with the permutation array     */
6395   /* ---------------------------------------------------------------*/
6396   for (k = 0; k < n2; k++) perm2[k] = k;
6397   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6398 
6399   /* ---------------------------------------------------------------*/
6400   /* Split received COOs into diag/offdiag portions                 */
6401   /* ---------------------------------------------------------------*/
6402   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6403   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6404   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6405 
6406   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6407   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6408 
6409   /* --------------------------------------------------------------------------*/
6410   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6411   /* --------------------------------------------------------------------------*/
6412   PetscInt *Ai, *Bi;
6413   PetscInt *Aj, *Bj;
6414 
6415   PetscCall(PetscMalloc1(m + 1, &Ai));
6416   PetscCall(PetscMalloc1(m + 1, &Bi));
6417   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6418   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6419 
6420   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6421   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6422   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6423   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6424   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6425 
6426   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6427   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6428 
6429   /* --------------------------------------------------------------------------*/
6430   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6431   /* expect nonzeros in A/B most likely have local contributing entries        */
6432   /* --------------------------------------------------------------------------*/
6433   PetscInt    Annz = Ai[m];
6434   PetscInt    Bnnz = Bi[m];
6435   PetscCount *Ajmap1_new, *Bjmap1_new;
6436 
6437   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6438   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6439 
6440   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6441   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6442 
6443   PetscCall(PetscFree(Aimap1));
6444   PetscCall(PetscFree(Ajmap1));
6445   PetscCall(PetscFree(Bimap1));
6446   PetscCall(PetscFree(Bjmap1));
6447   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6448   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6449   PetscCall(PetscFree(perm1));
6450   PetscCall(PetscFree3(i2, j2, perm2));
6451 
6452   Ajmap1 = Ajmap1_new;
6453   Bjmap1 = Bjmap1_new;
6454 
6455   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6456   if (Annz < Annz1 + Annz2) {
6457     PetscInt *Aj_new;
6458     PetscCall(PetscMalloc1(Annz, &Aj_new));
6459     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6460     PetscCall(PetscFree(Aj));
6461     Aj = Aj_new;
6462   }
6463 
6464   if (Bnnz < Bnnz1 + Bnnz2) {
6465     PetscInt *Bj_new;
6466     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6467     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6468     PetscCall(PetscFree(Bj));
6469     Bj = Bj_new;
6470   }
6471 
6472   /* --------------------------------------------------------------------------------*/
6473   /* Create new submatrices for on-process and off-process coupling                  */
6474   /* --------------------------------------------------------------------------------*/
6475   PetscScalar *Aa, *Ba;
6476   MatType      rtype;
6477   Mat_SeqAIJ  *a, *b;
6478   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6479   PetscCall(PetscCalloc1(Bnnz, &Ba));
6480   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6481   if (cstart) {
6482     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6483   }
6484   PetscCall(MatDestroy(&mpiaij->A));
6485   PetscCall(MatDestroy(&mpiaij->B));
6486   PetscCall(MatGetRootType_Private(mat, &rtype));
6487   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6488   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6489   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6490 
6491   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6492   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6493   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6494   a->free_a = b->free_a = PETSC_TRUE;
6495   a->free_ij = b->free_ij = PETSC_TRUE;
6496 
6497   /* conversion must happen AFTER multiply setup */
6498   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6499   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6500   PetscCall(VecDestroy(&mpiaij->lvec));
6501   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6502 
6503   mpiaij->coo_n   = coo_n;
6504   mpiaij->coo_sf  = sf2;
6505   mpiaij->sendlen = nleaves;
6506   mpiaij->recvlen = nroots;
6507 
6508   mpiaij->Annz = Annz;
6509   mpiaij->Bnnz = Bnnz;
6510 
6511   mpiaij->Annz2 = Annz2;
6512   mpiaij->Bnnz2 = Bnnz2;
6513 
6514   mpiaij->Atot1 = Atot1;
6515   mpiaij->Atot2 = Atot2;
6516   mpiaij->Btot1 = Btot1;
6517   mpiaij->Btot2 = Btot2;
6518 
6519   mpiaij->Ajmap1 = Ajmap1;
6520   mpiaij->Aperm1 = Aperm1;
6521 
6522   mpiaij->Bjmap1 = Bjmap1;
6523   mpiaij->Bperm1 = Bperm1;
6524 
6525   mpiaij->Aimap2 = Aimap2;
6526   mpiaij->Ajmap2 = Ajmap2;
6527   mpiaij->Aperm2 = Aperm2;
6528 
6529   mpiaij->Bimap2 = Bimap2;
6530   mpiaij->Bjmap2 = Bjmap2;
6531   mpiaij->Bperm2 = Bperm2;
6532 
6533   mpiaij->Cperm1 = Cperm1;
6534 
6535   /* Allocate in preallocation. If not used, it has zero cost on host */
6536   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6537   PetscFunctionReturn(0);
6538 }
6539 
6540 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) {
6541   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6542   Mat               A = mpiaij->A, B = mpiaij->B;
6543   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6544   PetscScalar      *Aa, *Ba;
6545   PetscScalar      *sendbuf = mpiaij->sendbuf;
6546   PetscScalar      *recvbuf = mpiaij->recvbuf;
6547   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6548   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6549   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6550   const PetscCount *Cperm1 = mpiaij->Cperm1;
6551 
6552   PetscFunctionBegin;
6553   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6554   PetscCall(MatSeqAIJGetArray(B, &Ba));
6555 
6556   /* Pack entries to be sent to remote */
6557   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6558 
6559   /* Send remote entries to their owner and overlap the communication with local computation */
6560   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6561   /* Add local entries to A and B */
6562   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6563     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stablility */
6564     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6565     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6566   }
6567   for (PetscCount i = 0; i < Bnnz; i++) {
6568     PetscScalar sum = 0.0;
6569     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6570     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6571   }
6572   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6573 
6574   /* Add received remote entries to A and B */
6575   for (PetscCount i = 0; i < Annz2; i++) {
6576     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6577   }
6578   for (PetscCount i = 0; i < Bnnz2; i++) {
6579     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6580   }
6581   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6582   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6583   PetscFunctionReturn(0);
6584 }
6585 
6586 /* ----------------------------------------------------------------*/
6587 
6588 /*MC
6589    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6590 
6591    Options Database Keys:
6592 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6593 
6594    Level: beginner
6595 
6596    Notes:
6597     `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values,
6598     in this case the values associated with the rows and columns one passes in are set to zero
6599     in the matrix
6600 
6601     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6602     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6603 
6604 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6605 M*/
6606 
6607 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) {
6608   Mat_MPIAIJ *b;
6609   PetscMPIInt size;
6610 
6611   PetscFunctionBegin;
6612   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6613 
6614   PetscCall(PetscNew(&b));
6615   B->data = (void *)b;
6616   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6617   B->assembled  = PETSC_FALSE;
6618   B->insertmode = NOT_SET_VALUES;
6619   b->size       = size;
6620 
6621   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6622 
6623   /* build cache for off array entries formed */
6624   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6625 
6626   b->donotstash  = PETSC_FALSE;
6627   b->colmap      = NULL;
6628   b->garray      = NULL;
6629   b->roworiented = PETSC_TRUE;
6630 
6631   /* stuff used for matrix vector multiply */
6632   b->lvec  = NULL;
6633   b->Mvctx = NULL;
6634 
6635   /* stuff for MatGetRow() */
6636   b->rowindices   = NULL;
6637   b->rowvalues    = NULL;
6638   b->getrowactive = PETSC_FALSE;
6639 
6640   /* flexible pointer used in CUSPARSE classes */
6641   b->spptr = NULL;
6642 
6643   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6644   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6645   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6646   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6647   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6648   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6649   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6650   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6651   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6652   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6653 #if defined(PETSC_HAVE_CUDA)
6654   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6655 #endif
6656 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6657   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6658 #endif
6659 #if defined(PETSC_HAVE_MKL_SPARSE)
6660   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6661 #endif
6662   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6663   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6664   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6665   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6666 #if defined(PETSC_HAVE_ELEMENTAL)
6667   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6668 #endif
6669 #if defined(PETSC_HAVE_SCALAPACK)
6670   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6671 #endif
6672   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6673   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6674 #if defined(PETSC_HAVE_HYPRE)
6675   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6676   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6677 #endif
6678   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6679   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6680   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6681   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6682   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6683   PetscFunctionReturn(0);
6684 }
6685 
6686 /*@C
6687      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6688          and "off-diagonal" part of the matrix in CSR format.
6689 
6690    Collective
6691 
6692    Input Parameters:
6693 +  comm - MPI communicator
6694 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6695 .  n - This value should be the same as the local size used in creating the
6696        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6697        calculated if N is given) For square matrices n is almost always m.
6698 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
6699 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
6700 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6701 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6702 .   a - matrix values
6703 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6704 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6705 -   oa - matrix values
6706 
6707    Output Parameter:
6708 .   mat - the matrix
6709 
6710    Level: advanced
6711 
6712    Notes:
6713        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6714        must free the arrays once the matrix has been destroyed and not before.
6715 
6716        The i and j indices are 0 based
6717 
6718        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6719 
6720        This sets local rows and cannot be used to set off-processor values.
6721 
6722        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6723        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6724        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6725        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6726        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6727        communication if it is known that only local entries will be set.
6728 
6729 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6730           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6731 @*/
6732 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) {
6733   Mat_MPIAIJ *maij;
6734 
6735   PetscFunctionBegin;
6736   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6737   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6738   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6739   PetscCall(MatCreate(comm, mat));
6740   PetscCall(MatSetSizes(*mat, m, n, M, N));
6741   PetscCall(MatSetType(*mat, MATMPIAIJ));
6742   maij = (Mat_MPIAIJ *)(*mat)->data;
6743 
6744   (*mat)->preallocated = PETSC_TRUE;
6745 
6746   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6747   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6748 
6749   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6750   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6751 
6752   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6753   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6754   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6755   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6756   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6757   PetscFunctionReturn(0);
6758 }
6759 
6760 typedef struct {
6761   Mat       *mp;    /* intermediate products */
6762   PetscBool *mptmp; /* is the intermediate product temporary ? */
6763   PetscInt   cp;    /* number of intermediate products */
6764 
6765   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6766   PetscInt    *startsj_s, *startsj_r;
6767   PetscScalar *bufa;
6768   Mat          P_oth;
6769 
6770   /* may take advantage of merging product->B */
6771   Mat Bloc; /* B-local by merging diag and off-diag */
6772 
6773   /* cusparse does not have support to split between symbolic and numeric phases.
6774      When api_user is true, we don't need to update the numerical values
6775      of the temporary storage */
6776   PetscBool reusesym;
6777 
6778   /* support for COO values insertion */
6779   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6780   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6781   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6782   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6783   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6784   PetscMemType mtype;
6785 
6786   /* customization */
6787   PetscBool abmerge;
6788   PetscBool P_oth_bind;
6789 } MatMatMPIAIJBACKEND;
6790 
6791 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) {
6792   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6793   PetscInt             i;
6794 
6795   PetscFunctionBegin;
6796   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6797   PetscCall(PetscFree(mmdata->bufa));
6798   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6799   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6800   PetscCall(MatDestroy(&mmdata->P_oth));
6801   PetscCall(MatDestroy(&mmdata->Bloc));
6802   PetscCall(PetscSFDestroy(&mmdata->sf));
6803   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6804   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6805   PetscCall(PetscFree(mmdata->own[0]));
6806   PetscCall(PetscFree(mmdata->own));
6807   PetscCall(PetscFree(mmdata->off[0]));
6808   PetscCall(PetscFree(mmdata->off));
6809   PetscCall(PetscFree(mmdata));
6810   PetscFunctionReturn(0);
6811 }
6812 
6813 /* Copy selected n entries with indices in idx[] of A to v[].
6814    If idx is NULL, copy the whole data array of A to v[]
6815  */
6816 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) {
6817   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
6818 
6819   PetscFunctionBegin;
6820   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
6821   if (f) {
6822     PetscCall((*f)(A, n, idx, v));
6823   } else {
6824     const PetscScalar *vv;
6825 
6826     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
6827     if (n && idx) {
6828       PetscScalar    *w  = v;
6829       const PetscInt *oi = idx;
6830       PetscInt        j;
6831 
6832       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6833     } else {
6834       PetscCall(PetscArraycpy(v, vv, n));
6835     }
6836     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
6837   }
6838   PetscFunctionReturn(0);
6839 }
6840 
6841 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) {
6842   MatMatMPIAIJBACKEND *mmdata;
6843   PetscInt             i, n_d, n_o;
6844 
6845   PetscFunctionBegin;
6846   MatCheckProduct(C, 1);
6847   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
6848   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
6849   if (!mmdata->reusesym) { /* update temporary matrices */
6850     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
6851     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
6852   }
6853   mmdata->reusesym = PETSC_FALSE;
6854 
6855   for (i = 0; i < mmdata->cp; i++) {
6856     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
6857     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6858   }
6859   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6860     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
6861 
6862     if (mmdata->mptmp[i]) continue;
6863     if (noff) {
6864       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
6865 
6866       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
6867       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
6868       n_o += noff;
6869       n_d += nown;
6870     } else {
6871       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
6872 
6873       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
6874       n_d += mm->nz;
6875     }
6876   }
6877   if (mmdata->hasoffproc) { /* offprocess insertion */
6878     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
6879     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
6880   }
6881   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
6882   PetscFunctionReturn(0);
6883 }
6884 
6885 /* Support for Pt * A, A * P, or Pt * A * P */
6886 #define MAX_NUMBER_INTERMEDIATE 4
6887 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) {
6888   Mat_Product           *product = C->product;
6889   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6890   Mat_MPIAIJ            *a, *p;
6891   MatMatMPIAIJBACKEND   *mmdata;
6892   ISLocalToGlobalMapping P_oth_l2g = NULL;
6893   IS                     glob      = NULL;
6894   const char            *prefix;
6895   char                   pprefix[256];
6896   const PetscInt        *globidx, *P_oth_idx;
6897   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
6898   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
6899   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE];  /* col/row map type for each Mat in mp[]. */
6900                                                                                           /* type-0: consecutive, start from 0; type-1: consecutive with */
6901                                                                                           /* a base offset; type-2: sparse with a local to global map table */
6902   const PetscInt        *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6903 
6904   MatProductType ptype;
6905   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk;
6906   PetscMPIInt    size;
6907 
6908   PetscFunctionBegin;
6909   MatCheckProduct(C, 1);
6910   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
6911   ptype = product->type;
6912   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
6913     ptype                                          = MATPRODUCT_AB;
6914     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6915   }
6916   switch (ptype) {
6917   case MATPRODUCT_AB:
6918     A          = product->A;
6919     P          = product->B;
6920     m          = A->rmap->n;
6921     n          = P->cmap->n;
6922     M          = A->rmap->N;
6923     N          = P->cmap->N;
6924     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6925     break;
6926   case MATPRODUCT_AtB:
6927     P          = product->A;
6928     A          = product->B;
6929     m          = P->cmap->n;
6930     n          = A->cmap->n;
6931     M          = P->cmap->N;
6932     N          = A->cmap->N;
6933     hasoffproc = PETSC_TRUE;
6934     break;
6935   case MATPRODUCT_PtAP:
6936     A          = product->A;
6937     P          = product->B;
6938     m          = P->cmap->n;
6939     n          = P->cmap->n;
6940     M          = P->cmap->N;
6941     N          = P->cmap->N;
6942     hasoffproc = PETSC_TRUE;
6943     break;
6944   default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
6945   }
6946   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
6947   if (size == 1) hasoffproc = PETSC_FALSE;
6948 
6949   /* defaults */
6950   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
6951     mp[i]    = NULL;
6952     mptmp[i] = PETSC_FALSE;
6953     rmapt[i] = -1;
6954     cmapt[i] = -1;
6955     rmapa[i] = NULL;
6956     cmapa[i] = NULL;
6957   }
6958 
6959   /* customization */
6960   PetscCall(PetscNew(&mmdata));
6961   mmdata->reusesym = product->api_user;
6962   if (ptype == MATPRODUCT_AB) {
6963     if (product->api_user) {
6964       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
6965       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
6966       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6967       PetscOptionsEnd();
6968     } else {
6969       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
6970       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
6971       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6972       PetscOptionsEnd();
6973     }
6974   } else if (ptype == MATPRODUCT_PtAP) {
6975     if (product->api_user) {
6976       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
6977       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6978       PetscOptionsEnd();
6979     } else {
6980       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
6981       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6982       PetscOptionsEnd();
6983     }
6984   }
6985   a = (Mat_MPIAIJ *)A->data;
6986   p = (Mat_MPIAIJ *)P->data;
6987   PetscCall(MatSetSizes(C, m, n, M, N));
6988   PetscCall(PetscLayoutSetUp(C->rmap));
6989   PetscCall(PetscLayoutSetUp(C->cmap));
6990   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6991   PetscCall(MatGetOptionsPrefix(C, &prefix));
6992 
6993   cp = 0;
6994   switch (ptype) {
6995   case MATPRODUCT_AB: /* A * P */
6996     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
6997 
6998     /* A_diag * P_local (merged or not) */
6999     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7000       /* P is product->B */
7001       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7002       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7003       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7004       PetscCall(MatProductSetFill(mp[cp], product->fill));
7005       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7006       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7007       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7008       mp[cp]->product->api_user = product->api_user;
7009       PetscCall(MatProductSetFromOptions(mp[cp]));
7010       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7011       PetscCall(ISGetIndices(glob, &globidx));
7012       rmapt[cp] = 1;
7013       cmapt[cp] = 2;
7014       cmapa[cp] = globidx;
7015       mptmp[cp] = PETSC_FALSE;
7016       cp++;
7017     } else { /* A_diag * P_diag and A_diag * P_off */
7018       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7019       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7020       PetscCall(MatProductSetFill(mp[cp], product->fill));
7021       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7022       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7023       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7024       mp[cp]->product->api_user = product->api_user;
7025       PetscCall(MatProductSetFromOptions(mp[cp]));
7026       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7027       rmapt[cp] = 1;
7028       cmapt[cp] = 1;
7029       mptmp[cp] = PETSC_FALSE;
7030       cp++;
7031       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7032       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7033       PetscCall(MatProductSetFill(mp[cp], product->fill));
7034       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7035       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7036       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7037       mp[cp]->product->api_user = product->api_user;
7038       PetscCall(MatProductSetFromOptions(mp[cp]));
7039       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7040       rmapt[cp] = 1;
7041       cmapt[cp] = 2;
7042       cmapa[cp] = p->garray;
7043       mptmp[cp] = PETSC_FALSE;
7044       cp++;
7045     }
7046 
7047     /* A_off * P_other */
7048     if (mmdata->P_oth) {
7049       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7050       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7051       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7052       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7053       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7054       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7055       PetscCall(MatProductSetFill(mp[cp], product->fill));
7056       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7057       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7058       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7059       mp[cp]->product->api_user = product->api_user;
7060       PetscCall(MatProductSetFromOptions(mp[cp]));
7061       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7062       rmapt[cp] = 1;
7063       cmapt[cp] = 2;
7064       cmapa[cp] = P_oth_idx;
7065       mptmp[cp] = PETSC_FALSE;
7066       cp++;
7067     }
7068     break;
7069 
7070   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7071     /* A is product->B */
7072     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7073     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7074       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7075       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7076       PetscCall(MatProductSetFill(mp[cp], product->fill));
7077       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7078       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7079       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7080       mp[cp]->product->api_user = product->api_user;
7081       PetscCall(MatProductSetFromOptions(mp[cp]));
7082       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7083       PetscCall(ISGetIndices(glob, &globidx));
7084       rmapt[cp] = 2;
7085       rmapa[cp] = globidx;
7086       cmapt[cp] = 2;
7087       cmapa[cp] = globidx;
7088       mptmp[cp] = PETSC_FALSE;
7089       cp++;
7090     } else {
7091       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7092       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7093       PetscCall(MatProductSetFill(mp[cp], product->fill));
7094       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7095       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7096       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7097       mp[cp]->product->api_user = product->api_user;
7098       PetscCall(MatProductSetFromOptions(mp[cp]));
7099       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7100       PetscCall(ISGetIndices(glob, &globidx));
7101       rmapt[cp] = 1;
7102       cmapt[cp] = 2;
7103       cmapa[cp] = globidx;
7104       mptmp[cp] = PETSC_FALSE;
7105       cp++;
7106       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7107       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7108       PetscCall(MatProductSetFill(mp[cp], product->fill));
7109       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7110       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7111       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7112       mp[cp]->product->api_user = product->api_user;
7113       PetscCall(MatProductSetFromOptions(mp[cp]));
7114       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7115       rmapt[cp] = 2;
7116       rmapa[cp] = p->garray;
7117       cmapt[cp] = 2;
7118       cmapa[cp] = globidx;
7119       mptmp[cp] = PETSC_FALSE;
7120       cp++;
7121     }
7122     break;
7123   case MATPRODUCT_PtAP:
7124     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7125     /* P is product->B */
7126     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7127     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7128     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7129     PetscCall(MatProductSetFill(mp[cp], product->fill));
7130     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7131     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7132     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7133     mp[cp]->product->api_user = product->api_user;
7134     PetscCall(MatProductSetFromOptions(mp[cp]));
7135     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7136     PetscCall(ISGetIndices(glob, &globidx));
7137     rmapt[cp] = 2;
7138     rmapa[cp] = globidx;
7139     cmapt[cp] = 2;
7140     cmapa[cp] = globidx;
7141     mptmp[cp] = PETSC_FALSE;
7142     cp++;
7143     if (mmdata->P_oth) {
7144       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7145       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7146       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7147       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7148       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7149       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7150       PetscCall(MatProductSetFill(mp[cp], product->fill));
7151       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7152       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7153       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7154       mp[cp]->product->api_user = product->api_user;
7155       PetscCall(MatProductSetFromOptions(mp[cp]));
7156       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7157       mptmp[cp] = PETSC_TRUE;
7158       cp++;
7159       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7160       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7161       PetscCall(MatProductSetFill(mp[cp], product->fill));
7162       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7163       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7164       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7165       mp[cp]->product->api_user = product->api_user;
7166       PetscCall(MatProductSetFromOptions(mp[cp]));
7167       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7168       rmapt[cp] = 2;
7169       rmapa[cp] = globidx;
7170       cmapt[cp] = 2;
7171       cmapa[cp] = P_oth_idx;
7172       mptmp[cp] = PETSC_FALSE;
7173       cp++;
7174     }
7175     break;
7176   default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7177   }
7178   /* sanity check */
7179   if (size > 1)
7180     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7181 
7182   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7183   for (i = 0; i < cp; i++) {
7184     mmdata->mp[i]    = mp[i];
7185     mmdata->mptmp[i] = mptmp[i];
7186   }
7187   mmdata->cp             = cp;
7188   C->product->data       = mmdata;
7189   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7190   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7191 
7192   /* memory type */
7193   mmdata->mtype = PETSC_MEMTYPE_HOST;
7194   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7195   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7196   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7197   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7198 
7199   /* prepare coo coordinates for values insertion */
7200 
7201   /* count total nonzeros of those intermediate seqaij Mats
7202     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7203     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7204     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7205   */
7206   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7207     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7208     if (mptmp[cp]) continue;
7209     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7210       const PetscInt *rmap = rmapa[cp];
7211       const PetscInt  mr   = mp[cp]->rmap->n;
7212       const PetscInt  rs   = C->rmap->rstart;
7213       const PetscInt  re   = C->rmap->rend;
7214       const PetscInt *ii   = mm->i;
7215       for (i = 0; i < mr; i++) {
7216         const PetscInt gr = rmap[i];
7217         const PetscInt nz = ii[i + 1] - ii[i];
7218         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7219         else ncoo_oown += nz;                  /* this row is local */
7220       }
7221     } else ncoo_d += mm->nz;
7222   }
7223 
7224   /*
7225     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7226 
7227     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7228 
7229     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7230 
7231     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7232     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7233     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7234 
7235     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7236     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7237   */
7238   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7239   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7240 
7241   /* gather (i,j) of nonzeros inserted by remote procs */
7242   if (hasoffproc) {
7243     PetscSF  msf;
7244     PetscInt ncoo2, *coo_i2, *coo_j2;
7245 
7246     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7247     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7248     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7249 
7250     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7251       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7252       PetscInt   *idxoff = mmdata->off[cp];
7253       PetscInt   *idxown = mmdata->own[cp];
7254       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7255         const PetscInt *rmap = rmapa[cp];
7256         const PetscInt *cmap = cmapa[cp];
7257         const PetscInt *ii   = mm->i;
7258         PetscInt       *coi  = coo_i + ncoo_o;
7259         PetscInt       *coj  = coo_j + ncoo_o;
7260         const PetscInt  mr   = mp[cp]->rmap->n;
7261         const PetscInt  rs   = C->rmap->rstart;
7262         const PetscInt  re   = C->rmap->rend;
7263         const PetscInt  cs   = C->cmap->rstart;
7264         for (i = 0; i < mr; i++) {
7265           const PetscInt *jj = mm->j + ii[i];
7266           const PetscInt  gr = rmap[i];
7267           const PetscInt  nz = ii[i + 1] - ii[i];
7268           if (gr < rs || gr >= re) { /* this is an offproc row */
7269             for (j = ii[i]; j < ii[i + 1]; j++) {
7270               *coi++    = gr;
7271               *idxoff++ = j;
7272             }
7273             if (!cmapt[cp]) { /* already global */
7274               for (j = 0; j < nz; j++) *coj++ = jj[j];
7275             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7276               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7277             } else { /* offdiag */
7278               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7279             }
7280             ncoo_o += nz;
7281           } else { /* this is a local row */
7282             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7283           }
7284         }
7285       }
7286       mmdata->off[cp + 1] = idxoff;
7287       mmdata->own[cp + 1] = idxown;
7288     }
7289 
7290     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7291     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7292     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7293     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7294     ncoo = ncoo_d + ncoo_oown + ncoo2;
7295     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7296     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7297     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7298     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7299     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7300     PetscCall(PetscFree2(coo_i, coo_j));
7301     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7302     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7303     coo_i = coo_i2;
7304     coo_j = coo_j2;
7305   } else { /* no offproc values insertion */
7306     ncoo = ncoo_d;
7307     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7308 
7309     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7310     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7311     PetscCall(PetscSFSetUp(mmdata->sf));
7312   }
7313   mmdata->hasoffproc = hasoffproc;
7314 
7315   /* gather (i,j) of nonzeros inserted locally */
7316   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7317     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7318     PetscInt       *coi  = coo_i + ncoo_d;
7319     PetscInt       *coj  = coo_j + ncoo_d;
7320     const PetscInt *jj   = mm->j;
7321     const PetscInt *ii   = mm->i;
7322     const PetscInt *cmap = cmapa[cp];
7323     const PetscInt *rmap = rmapa[cp];
7324     const PetscInt  mr   = mp[cp]->rmap->n;
7325     const PetscInt  rs   = C->rmap->rstart;
7326     const PetscInt  re   = C->rmap->rend;
7327     const PetscInt  cs   = C->cmap->rstart;
7328 
7329     if (mptmp[cp]) continue;
7330     if (rmapt[cp] == 1) { /* consecutive rows */
7331       /* fill coo_i */
7332       for (i = 0; i < mr; i++) {
7333         const PetscInt gr = i + rs;
7334         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7335       }
7336       /* fill coo_j */
7337       if (!cmapt[cp]) { /* type-0, already global */
7338         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7339       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7340         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7341       } else {                                            /* type-2, local to global for sparse columns */
7342         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7343       }
7344       ncoo_d += mm->nz;
7345     } else if (rmapt[cp] == 2) { /* sparse rows */
7346       for (i = 0; i < mr; i++) {
7347         const PetscInt *jj = mm->j + ii[i];
7348         const PetscInt  gr = rmap[i];
7349         const PetscInt  nz = ii[i + 1] - ii[i];
7350         if (gr >= rs && gr < re) { /* local rows */
7351           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7352           if (!cmapt[cp]) { /* type-0, already global */
7353             for (j = 0; j < nz; j++) *coj++ = jj[j];
7354           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7355             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7356           } else { /* type-2, local to global for sparse columns */
7357             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7358           }
7359           ncoo_d += nz;
7360         }
7361       }
7362     }
7363   }
7364   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7365   PetscCall(ISDestroy(&glob));
7366   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7367   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7368   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7369   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7370 
7371   /* preallocate with COO data */
7372   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7373   PetscCall(PetscFree2(coo_i, coo_j));
7374   PetscFunctionReturn(0);
7375 }
7376 
7377 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) {
7378   Mat_Product *product = mat->product;
7379 #if defined(PETSC_HAVE_DEVICE)
7380   PetscBool match  = PETSC_FALSE;
7381   PetscBool usecpu = PETSC_FALSE;
7382 #else
7383   PetscBool match = PETSC_TRUE;
7384 #endif
7385 
7386   PetscFunctionBegin;
7387   MatCheckProduct(mat, 1);
7388 #if defined(PETSC_HAVE_DEVICE)
7389   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7390   if (match) { /* we can always fallback to the CPU if requested */
7391     switch (product->type) {
7392     case MATPRODUCT_AB:
7393       if (product->api_user) {
7394         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7395         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7396         PetscOptionsEnd();
7397       } else {
7398         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7399         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7400         PetscOptionsEnd();
7401       }
7402       break;
7403     case MATPRODUCT_AtB:
7404       if (product->api_user) {
7405         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7406         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7407         PetscOptionsEnd();
7408       } else {
7409         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7410         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7411         PetscOptionsEnd();
7412       }
7413       break;
7414     case MATPRODUCT_PtAP:
7415       if (product->api_user) {
7416         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7417         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7418         PetscOptionsEnd();
7419       } else {
7420         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7421         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7422         PetscOptionsEnd();
7423       }
7424       break;
7425     default: break;
7426     }
7427     match = (PetscBool)!usecpu;
7428   }
7429 #endif
7430   if (match) {
7431     switch (product->type) {
7432     case MATPRODUCT_AB:
7433     case MATPRODUCT_AtB:
7434     case MATPRODUCT_PtAP: mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; break;
7435     default: break;
7436     }
7437   }
7438   /* fallback to MPIAIJ ops */
7439   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7440   PetscFunctionReturn(0);
7441 }
7442 
7443 /*
7444    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7445 
7446    n - the number of block indices in cc[]
7447    cc - the block indices (must be large enough to contain the indices)
7448 */
7449 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) {
7450   PetscInt        cnt = -1, nidx, j;
7451   const PetscInt *idx;
7452 
7453   PetscFunctionBegin;
7454   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7455   if (nidx) {
7456     cnt     = 0;
7457     cc[cnt] = idx[0] / bs;
7458     for (j = 1; j < nidx; j++) {
7459       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7460     }
7461   }
7462   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7463   *n = cnt + 1;
7464   PetscFunctionReturn(0);
7465 }
7466 
7467 /*
7468     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7469 
7470     ncollapsed - the number of block indices
7471     collapsed - the block indices (must be large enough to contain the indices)
7472 */
7473 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) {
7474   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7475 
7476   PetscFunctionBegin;
7477   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7478   for (i = start + 1; i < start + bs; i++) {
7479     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7480     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7481     cprevtmp = cprev;
7482     cprev    = merged;
7483     merged   = cprevtmp;
7484   }
7485   *ncollapsed = nprev;
7486   if (collapsed) *collapsed = cprev;
7487   PetscFunctionReturn(0);
7488 }
7489 
7490 /* -------------------------------------------------------------------------- */
7491 /*
7492  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7493 
7494  Input Parameter:
7495  . Amat - matrix
7496  - symmetrize - make the result symmetric
7497  + scale - scale with diagonal
7498 
7499  Output Parameter:
7500  . a_Gmat - output scalar graph >= 0
7501 
7502  */
7503 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) {
7504   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7505   MPI_Comm  comm;
7506   Mat       Gmat;
7507   PetscBool ismpiaij, isseqaij;
7508   Mat       a, b, c;
7509   MatType   jtype;
7510 
7511   PetscFunctionBegin;
7512   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7513   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7514   PetscCall(MatGetSize(Amat, &MM, &NN));
7515   PetscCall(MatGetBlockSize(Amat, &bs));
7516   nloc = (Iend - Istart) / bs;
7517 
7518   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7519   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7520   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7521 
7522   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7523   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7524      implementation */
7525   if (bs > 1) {
7526     PetscCall(MatGetType(Amat, &jtype));
7527     PetscCall(MatCreate(comm, &Gmat));
7528     PetscCall(MatSetType(Gmat, jtype));
7529     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7530     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7531     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7532       PetscInt  *d_nnz, *o_nnz;
7533       MatScalar *aa, val, AA[4096];
7534       PetscInt  *aj, *ai, AJ[4096], nc;
7535       if (isseqaij) {
7536         a = Amat;
7537         b = NULL;
7538       } else {
7539         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7540         a             = d->A;
7541         b             = d->B;
7542       }
7543       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7544       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7545       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7546         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0;
7547         const PetscInt *cols;
7548         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
7549           PetscCall(MatGetRow(c, brow, &jj, &cols, NULL));
7550           nnz[brow / bs] = jj / bs;
7551           if (jj % bs) ok = 0;
7552           if (cols) j0 = cols[0];
7553           else j0 = -1;
7554           PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL));
7555           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7556           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
7557             PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL));
7558             if (jj % bs) ok = 0;
7559             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7560             if (nnz[brow / bs] != jj / bs) ok = 0;
7561             PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL));
7562           }
7563           if (!ok) {
7564             PetscCall(PetscFree2(d_nnz, o_nnz));
7565             goto old_bs;
7566           }
7567         }
7568         PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax);
7569       }
7570       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7571       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7572       PetscCall(PetscFree2(d_nnz, o_nnz));
7573       // diag
7574       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7575         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7576         ai               = aseq->i;
7577         n                = ai[brow + 1] - ai[brow];
7578         aj               = aseq->j + ai[brow];
7579         for (int k = 0; k < n; k += bs) {        // block columns
7580           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7581           val        = 0;
7582           for (int ii = 0; ii < bs; ii++) { // rows in block
7583             aa = aseq->a + ai[brow + ii] + k;
7584             for (int jj = 0; jj < bs; jj++) {         // columns in block
7585               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7586             }
7587           }
7588           AA[k / bs] = val;
7589         }
7590         grow = Istart / bs + brow / bs;
7591         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7592       }
7593       // off-diag
7594       if (ismpiaij) {
7595         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7596         const PetscScalar *vals;
7597         const PetscInt    *cols, *garray = aij->garray;
7598         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7599         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7600           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7601           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7602             AA[k / bs] = 0;
7603             AJ[cidx]   = garray[cols[k]] / bs;
7604           }
7605           nc = ncols / bs;
7606           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7607           for (int ii = 0; ii < bs; ii++) { // rows in block
7608             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7609             for (int k = 0; k < ncols; k += bs) {
7610               for (int jj = 0; jj < bs; jj++) { // cols in block
7611                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7612               }
7613             }
7614             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7615           }
7616           grow = Istart / bs + brow / bs;
7617           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7618         }
7619       }
7620       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7621       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7622     } else {
7623       const PetscScalar *vals;
7624       const PetscInt    *idx;
7625       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7626     old_bs:
7627       /*
7628        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7629        */
7630       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7631       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7632       if (isseqaij) {
7633         PetscInt max_d_nnz;
7634         /*
7635          Determine exact preallocation count for (sequential) scalar matrix
7636          */
7637         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7638         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7639         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7640         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7641         PetscCall(PetscFree3(w0, w1, w2));
7642       } else if (ismpiaij) {
7643         Mat             Daij, Oaij;
7644         const PetscInt *garray;
7645         PetscInt        max_d_nnz;
7646         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7647         /*
7648          Determine exact preallocation count for diagonal block portion of scalar matrix
7649          */
7650         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7651         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7652         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7653         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7654         PetscCall(PetscFree3(w0, w1, w2));
7655         /*
7656          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7657          */
7658         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7659           o_nnz[jj] = 0;
7660           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7661             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7662             o_nnz[jj] += ncols;
7663             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7664           }
7665           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7666         }
7667       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7668       /* get scalar copy (norms) of matrix */
7669       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7670       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7671       PetscCall(PetscFree2(d_nnz, o_nnz));
7672       for (Ii = Istart; Ii < Iend; Ii++) {
7673         PetscInt dest_row = Ii / bs;
7674         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7675         for (jj = 0; jj < ncols; jj++) {
7676           PetscInt    dest_col = idx[jj] / bs;
7677           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7678           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7679         }
7680         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7681       }
7682       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7683       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7684     }
7685   } else {
7686     /* TODO GPU: optimization proposal, each class provides fast implementation of this
7687      procedure via MatAbs API */
7688     /* just copy scalar matrix & abs() */
7689     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7690     if (isseqaij) {
7691       a = Gmat;
7692       b = NULL;
7693     } else {
7694       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7695       a             = d->A;
7696       b             = d->B;
7697     }
7698     /* abs */
7699     for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7700       MatInfo      info;
7701       PetscScalar *avals;
7702       PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7703       PetscCall(MatSeqAIJGetArray(c, &avals));
7704       for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7705       PetscCall(MatSeqAIJRestoreArray(c, &avals));
7706     }
7707   }
7708   if (symmetrize) {
7709     PetscBool isset, issym;
7710     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7711     if (!isset || !issym) {
7712       Mat matTrans;
7713       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7714       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7715       PetscCall(MatDestroy(&matTrans));
7716     }
7717     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7718   } else {
7719     PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7720   }
7721   if (scale) {
7722     /* scale c for all diagonal values = 1 or -1 */
7723     Vec diag;
7724     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7725     PetscCall(MatGetDiagonal(Gmat, diag));
7726     PetscCall(VecReciprocal(diag));
7727     PetscCall(VecSqrtAbs(diag));
7728     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7729     PetscCall(VecDestroy(&diag));
7730   }
7731   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7732   *a_Gmat = Gmat;
7733   PetscFunctionReturn(0);
7734 }
7735 
7736 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) {
7737   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7738   Mat                tGmat;
7739   MPI_Comm           comm;
7740   const PetscScalar *vals;
7741   const PetscInt    *idx;
7742   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7743   MatScalar         *AA; // this is checked in graph
7744   PetscBool          isseqaij;
7745   Mat                a, b, c;
7746   MatType            jtype;
7747 
7748   PetscFunctionBegin;
7749   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7750   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7751   PetscCall(MatGetType(Gmat, &jtype));
7752   PetscCall(MatCreate(comm, &tGmat));
7753   PetscCall(MatSetType(tGmat, jtype));
7754 
7755   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7756                Also, if the matrix is symmetric, can we skip this
7757                operation? It can be very expensive on large matrices. */
7758 
7759   // global sizes
7760   PetscCall(MatGetSize(Gmat, &MM, &NN));
7761   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7762   nloc = Iend - Istart;
7763   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7764   if (isseqaij) {
7765     a = Gmat;
7766     b = NULL;
7767   } else {
7768     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7769     a             = d->A;
7770     b             = d->B;
7771     garray        = d->garray;
7772   }
7773   /* Determine upper bound on non-zeros needed in new filtered matrix */
7774   for (PetscInt row = 0; row < nloc; row++) {
7775     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7776     d_nnz[row] = ncols;
7777     if (ncols > maxcols) maxcols = ncols;
7778     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7779   }
7780   if (b) {
7781     for (PetscInt row = 0; row < nloc; row++) {
7782       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7783       o_nnz[row] = ncols;
7784       if (ncols > maxcols) maxcols = ncols;
7785       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7786     }
7787   }
7788   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7789   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7790   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7791   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7792   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7793   PetscCall(PetscFree2(d_nnz, o_nnz));
7794   //
7795   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7796   nnz0 = nnz1 = 0;
7797   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7798     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7799       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7800       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7801         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7802         if (PetscRealPart(sv) > vfilter) {
7803           nnz1++;
7804           PetscInt cid = idx[jj] + Istart; //diag
7805           if (c != a) cid = garray[idx[jj]];
7806           AA[ncol_row] = vals[jj];
7807           AJ[ncol_row] = cid;
7808           ncol_row++;
7809         }
7810       }
7811       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7812       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7813     }
7814   }
7815   PetscCall(PetscFree2(AA, AJ));
7816   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7817   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7818   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7819 
7820   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7821 
7822   *filteredG = tGmat;
7823   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7824   PetscFunctionReturn(0);
7825 }
7826 
7827 /*
7828     Special version for direct calls from Fortran
7829 */
7830 #include <petsc/private/fortranimpl.h>
7831 
7832 /* Change these macros so can be used in void function */
7833 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7834 #undef PetscCall
7835 #define PetscCall(...) \
7836   do { \
7837     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
7838     if (PetscUnlikely(ierr_msv_mpiaij)) { \
7839       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
7840       return; \
7841     } \
7842   } while (0)
7843 
7844 #undef SETERRQ
7845 #define SETERRQ(comm, ierr, ...) \
7846   do { \
7847     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
7848     return; \
7849   } while (0)
7850 
7851 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7852 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7853 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7854 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7855 #else
7856 #endif
7857 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) {
7858   Mat         mat = *mmat;
7859   PetscInt    m = *mm, n = *mn;
7860   InsertMode  addv = *maddv;
7861   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
7862   PetscScalar value;
7863 
7864   MatCheckPreallocated(mat, 1);
7865   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7866   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
7867   {
7868     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
7869     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
7870     PetscBool roworiented = aij->roworiented;
7871 
7872     /* Some Variables required in the macro */
7873     Mat                    A     = aij->A;
7874     Mat_SeqAIJ            *a     = (Mat_SeqAIJ *)A->data;
7875     PetscInt              *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
7876     MatScalar             *aa;
7877     PetscBool              ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7878     Mat                    B                 = aij->B;
7879     Mat_SeqAIJ            *b                 = (Mat_SeqAIJ *)B->data;
7880     PetscInt              *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
7881     MatScalar             *ba;
7882     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7883      * cannot use "#if defined" inside a macro. */
7884     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7885 
7886     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
7887     PetscInt   nonew = a->nonew;
7888     MatScalar *ap1, *ap2;
7889 
7890     PetscFunctionBegin;
7891     PetscCall(MatSeqAIJGetArray(A, &aa));
7892     PetscCall(MatSeqAIJGetArray(B, &ba));
7893     for (i = 0; i < m; i++) {
7894       if (im[i] < 0) continue;
7895       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
7896       if (im[i] >= rstart && im[i] < rend) {
7897         row      = im[i] - rstart;
7898         lastcol1 = -1;
7899         rp1      = aj + ai[row];
7900         ap1      = aa + ai[row];
7901         rmax1    = aimax[row];
7902         nrow1    = ailen[row];
7903         low1     = 0;
7904         high1    = nrow1;
7905         lastcol2 = -1;
7906         rp2      = bj + bi[row];
7907         ap2      = ba + bi[row];
7908         rmax2    = bimax[row];
7909         nrow2    = bilen[row];
7910         low2     = 0;
7911         high2    = nrow2;
7912 
7913         for (j = 0; j < n; j++) {
7914           if (roworiented) value = v[i * n + j];
7915           else value = v[i + j * m];
7916           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7917           if (in[j] >= cstart && in[j] < cend) {
7918             col = in[j] - cstart;
7919             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
7920           } else if (in[j] < 0) continue;
7921           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7922             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7923             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
7924           } else {
7925             if (mat->was_assembled) {
7926               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
7927 #if defined(PETSC_USE_CTABLE)
7928               PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col));
7929               col--;
7930 #else
7931               col = aij->colmap[in[j]] - 1;
7932 #endif
7933               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
7934                 PetscCall(MatDisAssemble_MPIAIJ(mat));
7935                 col      = in[j];
7936                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7937                 B        = aij->B;
7938                 b        = (Mat_SeqAIJ *)B->data;
7939                 bimax    = b->imax;
7940                 bi       = b->i;
7941                 bilen    = b->ilen;
7942                 bj       = b->j;
7943                 rp2      = bj + bi[row];
7944                 ap2      = ba + bi[row];
7945                 rmax2    = bimax[row];
7946                 nrow2    = bilen[row];
7947                 low2     = 0;
7948                 high2    = nrow2;
7949                 bm       = aij->B->rmap->n;
7950                 ba       = b->a;
7951                 inserted = PETSC_FALSE;
7952               }
7953             } else col = in[j];
7954             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
7955           }
7956         }
7957       } else if (!aij->donotstash) {
7958         if (roworiented) {
7959           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7960         } else {
7961           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7962         }
7963       }
7964     }
7965     PetscCall(MatSeqAIJRestoreArray(A, &aa));
7966     PetscCall(MatSeqAIJRestoreArray(B, &ba));
7967   }
7968   PetscFunctionReturnVoid();
7969 }
7970 
7971 /* Undefining these here since they were redefined from their original definition above! No
7972  * other PETSc functions should be defined past this point, as it is impossible to recover the
7973  * original definitions */
7974 #undef PetscCall
7975 #undef SETERRQ
7976