xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
10   Mat B;
11 
12   PetscFunctionBegin;
13   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
14   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
15   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
16   PetscCall(MatDestroy(&B));
17   PetscFunctionReturn(0);
18 }
19 
20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
21   Mat B;
22 
23   PetscFunctionBegin;
24   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
25   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
26   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
27   PetscFunctionReturn(0);
28 }
29 
30 /*MC
31    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
34    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
35   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
41 
42   Developer Note:
43     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
44    enough exist.
45 
46   Level: beginner
47 
48 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
49 M*/
50 
51 /*MC
52    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
53 
54    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
55    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
56    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
57   for communicators controlling multiple processes.  It is recommended that you call both of
58   the above preallocation routines for simplicity.
59 
60    Options Database Keys:
61 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
62 
63   Level: beginner
64 
65 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
66 M*/
67 
68 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) {
69   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
70 
71   PetscFunctionBegin;
72 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
73   A->boundtocpu = flg;
74 #endif
75   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
76   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
77 
78   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
79    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
80    * to differ from the parent matrix. */
81   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
82   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
83 
84   PetscFunctionReturn(0);
85 }
86 
87 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) {
88   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
89 
90   PetscFunctionBegin;
91   if (mat->A) {
92     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
93     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
94   }
95   PetscFunctionReturn(0);
96 }
97 
98 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) {
99   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
100   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
101   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
102   const PetscInt  *ia, *ib;
103   const MatScalar *aa, *bb, *aav, *bav;
104   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
105   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
106 
107   PetscFunctionBegin;
108   *keptrows = NULL;
109 
110   ia = a->i;
111   ib = b->i;
112   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
113   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
114   for (i = 0; i < m; i++) {
115     na = ia[i + 1] - ia[i];
116     nb = ib[i + 1] - ib[i];
117     if (!na && !nb) {
118       cnt++;
119       goto ok1;
120     }
121     aa = aav + ia[i];
122     for (j = 0; j < na; j++) {
123       if (aa[j] != 0.0) goto ok1;
124     }
125     bb = bav + ib[i];
126     for (j = 0; j < nb; j++) {
127       if (bb[j] != 0.0) goto ok1;
128     }
129     cnt++;
130   ok1:;
131   }
132   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
133   if (!n0rows) {
134     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
135     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
136     PetscFunctionReturn(0);
137   }
138   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
139   cnt = 0;
140   for (i = 0; i < m; i++) {
141     na = ia[i + 1] - ia[i];
142     nb = ib[i + 1] - ib[i];
143     if (!na && !nb) continue;
144     aa = aav + ia[i];
145     for (j = 0; j < na; j++) {
146       if (aa[j] != 0.0) {
147         rows[cnt++] = rstart + i;
148         goto ok2;
149       }
150     }
151     bb = bav + ib[i];
152     for (j = 0; j < nb; j++) {
153       if (bb[j] != 0.0) {
154         rows[cnt++] = rstart + i;
155         goto ok2;
156       }
157     }
158   ok2:;
159   }
160   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
161   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
162   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
163   PetscFunctionReturn(0);
164 }
165 
166 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) {
167   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
168   PetscBool   cong;
169 
170   PetscFunctionBegin;
171   PetscCall(MatHasCongruentLayouts(Y, &cong));
172   if (Y->assembled && cong) {
173     PetscCall(MatDiagonalSet(aij->A, D, is));
174   } else {
175     PetscCall(MatDiagonalSet_Default(Y, D, is));
176   }
177   PetscFunctionReturn(0);
178 }
179 
180 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) {
181   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
182   PetscInt    i, rstart, nrows, *rows;
183 
184   PetscFunctionBegin;
185   *zrows = NULL;
186   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
187   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
188   for (i = 0; i < nrows; i++) rows[i] += rstart;
189   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
190   PetscFunctionReturn(0);
191 }
192 
193 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) {
194   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
195   PetscInt           i, m, n, *garray = aij->garray;
196   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
197   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
198   PetscReal         *work;
199   const PetscScalar *dummy;
200 
201   PetscFunctionBegin;
202   PetscCall(MatGetSize(A, &m, &n));
203   PetscCall(PetscCalloc1(n, &work));
204   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
205   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
206   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
207   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
208   if (type == NORM_2) {
209     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
210     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
211   } else if (type == NORM_1) {
212     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
213     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
214   } else if (type == NORM_INFINITY) {
215     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
216     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
217   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
218     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
219     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
220   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
221     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
222     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
223   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
224   if (type == NORM_INFINITY) {
225     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
226   } else {
227     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
228   }
229   PetscCall(PetscFree(work));
230   if (type == NORM_2) {
231     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
232   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
233     for (i = 0; i < n; i++) reductions[i] /= m;
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) {
239   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
240   IS              sis, gis;
241   const PetscInt *isis, *igis;
242   PetscInt        n, *iis, nsis, ngis, rstart, i;
243 
244   PetscFunctionBegin;
245   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
246   PetscCall(MatFindNonzeroRows(a->B, &gis));
247   PetscCall(ISGetSize(gis, &ngis));
248   PetscCall(ISGetSize(sis, &nsis));
249   PetscCall(ISGetIndices(sis, &isis));
250   PetscCall(ISGetIndices(gis, &igis));
251 
252   PetscCall(PetscMalloc1(ngis + nsis, &iis));
253   PetscCall(PetscArraycpy(iis, igis, ngis));
254   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
255   n = ngis + nsis;
256   PetscCall(PetscSortRemoveDupsInt(&n, iis));
257   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
258   for (i = 0; i < n; i++) iis[i] += rstart;
259   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
260 
261   PetscCall(ISRestoreIndices(sis, &isis));
262   PetscCall(ISRestoreIndices(gis, &igis));
263   PetscCall(ISDestroy(&sis));
264   PetscCall(ISDestroy(&gis));
265   PetscFunctionReturn(0);
266 }
267 
268 /*
269   Local utility routine that creates a mapping from the global column
270 number to the local number in the off-diagonal part of the local
271 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
272 a slightly higher hash table cost; without it it is not scalable (each processor
273 has an order N integer array but is fast to access.
274 */
275 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) {
276   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
277   PetscInt    n   = aij->B->cmap->n, i;
278 
279   PetscFunctionBegin;
280   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
281 #if defined(PETSC_USE_CTABLE)
282   PetscCall(PetscTableCreate(n, mat->cmap->N + 1, &aij->colmap));
283   for (i = 0; i < n; i++) PetscCall(PetscTableAdd(aij->colmap, aij->garray[i] + 1, i + 1, INSERT_VALUES));
284 #else
285   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
286   PetscCall(PetscLogObjectMemory((PetscObject)mat, (mat->cmap->N + 1) * sizeof(PetscInt)));
287   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
288 #endif
289   PetscFunctionReturn(0);
290 }
291 
292 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
293   { \
294     if (col <= lastcol1) low1 = 0; \
295     else high1 = nrow1; \
296     lastcol1 = col; \
297     while (high1 - low1 > 5) { \
298       t = (low1 + high1) / 2; \
299       if (rp1[t] > col) high1 = t; \
300       else low1 = t; \
301     } \
302     for (_i = low1; _i < high1; _i++) { \
303       if (rp1[_i] > col) break; \
304       if (rp1[_i] == col) { \
305         if (addv == ADD_VALUES) { \
306           ap1[_i] += value; \
307           /* Not sure LogFlops will slow dow the code or not */ \
308           (void)PetscLogFlops(1.0); \
309         } else ap1[_i] = value; \
310         goto a_noinsert; \
311       } \
312     } \
313     if (value == 0.0 && ignorezeroentries && row != col) { \
314       low1  = 0; \
315       high1 = nrow1; \
316       goto a_noinsert; \
317     } \
318     if (nonew == 1) { \
319       low1  = 0; \
320       high1 = nrow1; \
321       goto a_noinsert; \
322     } \
323     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
324     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
325     N = nrow1++ - 1; \
326     a->nz++; \
327     high1++; \
328     /* shift up all the later entries in this row */ \
329     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
330     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
331     rp1[_i] = col; \
332     ap1[_i] = value; \
333     A->nonzerostate++; \
334   a_noinsert:; \
335     ailen[row] = nrow1; \
336   }
337 
338 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
339   { \
340     if (col <= lastcol2) low2 = 0; \
341     else high2 = nrow2; \
342     lastcol2 = col; \
343     while (high2 - low2 > 5) { \
344       t = (low2 + high2) / 2; \
345       if (rp2[t] > col) high2 = t; \
346       else low2 = t; \
347     } \
348     for (_i = low2; _i < high2; _i++) { \
349       if (rp2[_i] > col) break; \
350       if (rp2[_i] == col) { \
351         if (addv == ADD_VALUES) { \
352           ap2[_i] += value; \
353           (void)PetscLogFlops(1.0); \
354         } else ap2[_i] = value; \
355         goto b_noinsert; \
356       } \
357     } \
358     if (value == 0.0 && ignorezeroentries) { \
359       low2  = 0; \
360       high2 = nrow2; \
361       goto b_noinsert; \
362     } \
363     if (nonew == 1) { \
364       low2  = 0; \
365       high2 = nrow2; \
366       goto b_noinsert; \
367     } \
368     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
369     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
370     N = nrow2++ - 1; \
371     b->nz++; \
372     high2++; \
373     /* shift up all the later entries in this row */ \
374     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
375     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
376     rp2[_i] = col; \
377     ap2[_i] = value; \
378     B->nonzerostate++; \
379   b_noinsert:; \
380     bilen[row] = nrow2; \
381   }
382 
383 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) {
384   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
385   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
386   PetscInt     l, *garray                         = mat->garray, diag;
387   PetscScalar *aa, *ba;
388 
389   PetscFunctionBegin;
390   /* code only works for square matrices A */
391 
392   /* find size of row to the left of the diagonal part */
393   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
394   row = row - diag;
395   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
396     if (garray[b->j[b->i[row] + l]] > diag) break;
397   }
398   if (l) {
399     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
400     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
401     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
402   }
403 
404   /* diagonal part */
405   if (a->i[row + 1] - a->i[row]) {
406     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
407     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
408     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
409   }
410 
411   /* right of diagonal part */
412   if (b->i[row + 1] - b->i[row] - l) {
413     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
414     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
416   }
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
421   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
422   PetscScalar value = 0.0;
423   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
424   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
425   PetscBool   roworiented = aij->roworiented;
426 
427   /* Some Variables required in the macro */
428   Mat         A     = aij->A;
429   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
430   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
431   PetscBool   ignorezeroentries = a->ignorezeroentries;
432   Mat         B                 = aij->B;
433   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
434   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
435   MatScalar  *aa, *ba;
436   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
437   PetscInt    nonew;
438   MatScalar  *ap1, *ap2;
439 
440   PetscFunctionBegin;
441   PetscCall(MatSeqAIJGetArray(A, &aa));
442   PetscCall(MatSeqAIJGetArray(B, &ba));
443   for (i = 0; i < m; i++) {
444     if (im[i] < 0) continue;
445     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
446     if (im[i] >= rstart && im[i] < rend) {
447       row      = im[i] - rstart;
448       lastcol1 = -1;
449       rp1      = aj + ai[row];
450       ap1      = aa + ai[row];
451       rmax1    = aimax[row];
452       nrow1    = ailen[row];
453       low1     = 0;
454       high1    = nrow1;
455       lastcol2 = -1;
456       rp2      = bj + bi[row];
457       ap2      = ba + bi[row];
458       rmax2    = bimax[row];
459       nrow2    = bilen[row];
460       low2     = 0;
461       high2    = nrow2;
462 
463       for (j = 0; j < n; j++) {
464         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
465         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
466         if (in[j] >= cstart && in[j] < cend) {
467           col   = in[j] - cstart;
468           nonew = a->nonew;
469           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
470         } else if (in[j] < 0) {
471           continue;
472         } else {
473           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
474           if (mat->was_assembled) {
475             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
476 #if defined(PETSC_USE_CTABLE)
477             PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); /* map global col ids to local ones */
478             col--;
479 #else
480             col = aij->colmap[in[j]] - 1;
481 #endif
482             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
483               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
484               col   = in[j];
485               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
486               B     = aij->B;
487               b     = (Mat_SeqAIJ *)B->data;
488               bimax = b->imax;
489               bi    = b->i;
490               bilen = b->ilen;
491               bj    = b->j;
492               ba    = b->a;
493               rp2   = bj + bi[row];
494               ap2   = ba + bi[row];
495               rmax2 = bimax[row];
496               nrow2 = bilen[row];
497               low2  = 0;
498               high2 = nrow2;
499               bm    = aij->B->rmap->n;
500               ba    = b->a;
501             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
502               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
503                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
504               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
505             }
506           } else col = in[j];
507           nonew = b->nonew;
508           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
509         }
510       }
511     } else {
512       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
513       if (!aij->donotstash) {
514         mat->assembled = PETSC_FALSE;
515         if (roworiented) {
516           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
517         } else {
518           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
519         }
520       }
521     }
522   }
523   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
524   PetscCall(MatSeqAIJRestoreArray(B, &ba));
525   PetscFunctionReturn(0);
526 }
527 
528 /*
529     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
530     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
531     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
532 */
533 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) {
534   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
535   Mat         A      = aij->A; /* diagonal part of the matrix */
536   Mat         B      = aij->B; /* offdiagonal part of the matrix */
537   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
538   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
539   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
540   PetscInt   *ailen = a->ilen, *aj = a->j;
541   PetscInt   *bilen = b->ilen, *bj = b->j;
542   PetscInt    am          = aij->A->rmap->n, j;
543   PetscInt    diag_so_far = 0, dnz;
544   PetscInt    offd_so_far = 0, onz;
545 
546   PetscFunctionBegin;
547   /* Iterate over all rows of the matrix */
548   for (j = 0; j < am; j++) {
549     dnz = onz = 0;
550     /*  Iterate over all non-zero columns of the current row */
551     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
552       /* If column is in the diagonal */
553       if (mat_j[col] >= cstart && mat_j[col] < cend) {
554         aj[diag_so_far++] = mat_j[col] - cstart;
555         dnz++;
556       } else { /* off-diagonal entries */
557         bj[offd_so_far++] = mat_j[col];
558         onz++;
559       }
560     }
561     ailen[j] = dnz;
562     bilen[j] = onz;
563   }
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
569     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
570     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
571     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
572     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
573 */
574 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) {
575   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
576   Mat          A    = aij->A; /* diagonal part of the matrix */
577   Mat          B    = aij->B; /* offdiagonal part of the matrix */
578   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
579   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
580   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
581   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
582   PetscInt    *ailen = a->ilen, *aj = a->j;
583   PetscInt    *bilen = b->ilen, *bj = b->j;
584   PetscInt     am          = aij->A->rmap->n, j;
585   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
586   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
587   PetscScalar *aa = a->a, *ba = b->a;
588 
589   PetscFunctionBegin;
590   /* Iterate over all rows of the matrix */
591   for (j = 0; j < am; j++) {
592     dnz_row = onz_row = 0;
593     rowstart_offd     = full_offd_i[j];
594     rowstart_diag     = full_diag_i[j];
595     /*  Iterate over all non-zero columns of the current row */
596     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
597       /* If column is in the diagonal */
598       if (mat_j[col] >= cstart && mat_j[col] < cend) {
599         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
600         aa[rowstart_diag + dnz_row] = mat_a[col];
601         dnz_row++;
602       } else { /* off-diagonal entries */
603         bj[rowstart_offd + onz_row] = mat_j[col];
604         ba[rowstart_offd + onz_row] = mat_a[col];
605         onz_row++;
606       }
607     }
608     ailen[j] = dnz_row;
609     bilen[j] = onz_row;
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
615   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
616   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
617   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
618 
619   PetscFunctionBegin;
620   for (i = 0; i < m; i++) {
621     if (idxm[i] < 0) continue; /* negative row */
622     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
623     if (idxm[i] >= rstart && idxm[i] < rend) {
624       row = idxm[i] - rstart;
625       for (j = 0; j < n; j++) {
626         if (idxn[j] < 0) continue; /* negative column */
627         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
628         if (idxn[j] >= cstart && idxn[j] < cend) {
629           col = idxn[j] - cstart;
630           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
631         } else {
632           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
633 #if defined(PETSC_USE_CTABLE)
634           PetscCall(PetscTableFind(aij->colmap, idxn[j] + 1, &col));
635           col--;
636 #else
637           col = aij->colmap[idxn[j]] - 1;
638 #endif
639           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
640           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
641         }
642       }
643     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
644   }
645   PetscFunctionReturn(0);
646 }
647 
648 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) {
649   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
650   PetscInt    nstash, reallocs;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
656   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
657   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) {
662   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
663   PetscMPIInt  n;
664   PetscInt     i, j, rstart, ncols, flg;
665   PetscInt    *row, *col;
666   PetscBool    other_disassembled;
667   PetscScalar *val;
668 
669   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
670 
671   PetscFunctionBegin;
672   if (!aij->donotstash && !mat->nooffprocentries) {
673     while (1) {
674       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
675       if (!flg) break;
676 
677       for (i = 0; i < n;) {
678         /* Now identify the consecutive vals belonging to the same row */
679         for (j = i, rstart = row[j]; j < n; j++) {
680           if (row[j] != rstart) break;
681         }
682         if (j < n) ncols = j - i;
683         else ncols = n - i;
684         /* Now assemble all these values with a single function call */
685         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
686         i = j;
687       }
688     }
689     PetscCall(MatStashScatterEnd_Private(&mat->stash));
690   }
691 #if defined(PETSC_HAVE_DEVICE)
692   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
693   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
694   if (mat->boundtocpu) {
695     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
696     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
697   }
698 #endif
699   PetscCall(MatAssemblyBegin(aij->A, mode));
700   PetscCall(MatAssemblyEnd(aij->A, mode));
701 
702   /* determine if any processor has disassembled, if so we must
703      also disassemble ourself, in order that we may reassemble. */
704   /*
705      if nonzero structure of submatrix B cannot change then we know that
706      no processor disassembled thus we can skip this stuff
707   */
708   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
709     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
710     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
711       PetscCall(MatDisAssemble_MPIAIJ(mat));
712     }
713   }
714   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
715   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
716 #if defined(PETSC_HAVE_DEVICE)
717   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
718 #endif
719   PetscCall(MatAssemblyBegin(aij->B, mode));
720   PetscCall(MatAssemblyEnd(aij->B, mode));
721 
722   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
723 
724   aij->rowvalues = NULL;
725 
726   PetscCall(VecDestroy(&aij->diag));
727 
728   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
729   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
730     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
731     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
732   }
733 #if defined(PETSC_HAVE_DEVICE)
734   mat->offloadmask = PETSC_OFFLOAD_BOTH;
735 #endif
736   PetscFunctionReturn(0);
737 }
738 
739 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) {
740   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
741 
742   PetscFunctionBegin;
743   PetscCall(MatZeroEntries(l->A));
744   PetscCall(MatZeroEntries(l->B));
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
749   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
750   PetscObjectState sA, sB;
751   PetscInt        *lrows;
752   PetscInt         r, len;
753   PetscBool        cong, lch, gch;
754 
755   PetscFunctionBegin;
756   /* get locally owned rows */
757   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
758   PetscCall(MatHasCongruentLayouts(A, &cong));
759   /* fix right hand side if needed */
760   if (x && b) {
761     const PetscScalar *xx;
762     PetscScalar       *bb;
763 
764     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
765     PetscCall(VecGetArrayRead(x, &xx));
766     PetscCall(VecGetArray(b, &bb));
767     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
768     PetscCall(VecRestoreArrayRead(x, &xx));
769     PetscCall(VecRestoreArray(b, &bb));
770   }
771 
772   sA = mat->A->nonzerostate;
773   sB = mat->B->nonzerostate;
774 
775   if (diag != 0.0 && cong) {
776     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
777     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
778   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
779     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
780     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
781     PetscInt    nnwA, nnwB;
782     PetscBool   nnzA, nnzB;
783 
784     nnwA = aijA->nonew;
785     nnwB = aijB->nonew;
786     nnzA = aijA->keepnonzeropattern;
787     nnzB = aijB->keepnonzeropattern;
788     if (!nnzA) {
789       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
790       aijA->nonew = 0;
791     }
792     if (!nnzB) {
793       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
794       aijB->nonew = 0;
795     }
796     /* Must zero here before the next loop */
797     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
798     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
799     for (r = 0; r < len; ++r) {
800       const PetscInt row = lrows[r] + A->rmap->rstart;
801       if (row >= A->cmap->N) continue;
802       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
803     }
804     aijA->nonew = nnwA;
805     aijB->nonew = nnwB;
806   } else {
807     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
808     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
809   }
810   PetscCall(PetscFree(lrows));
811   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
812   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
813 
814   /* reduce nonzerostate */
815   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
816   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
817   if (gch) A->nonzerostate++;
818   PetscFunctionReturn(0);
819 }
820 
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
823   PetscMPIInt        n = A->rmap->n;
824   PetscInt           i, j, r, m, len = 0;
825   PetscInt          *lrows, *owners = A->rmap->range;
826   PetscMPIInt        p = 0;
827   PetscSFNode       *rrows;
828   PetscSF            sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb, *mask, *aij_a;
831   Vec                xmask, lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
833   const PetscInt    *aj, *ii, *ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   PetscCall(PetscMalloc1(n, &lrows));
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   PetscCall(PetscMalloc1(N, &rrows));
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx = rows[r];
843     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
844     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
851   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
852   /* Collect flags for rows to be zeroed */
853   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
854   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
855   PetscCall(PetscSFDestroy(&sf));
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r)
858     if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
861   /* handle off diagonal part of matrix */
862   PetscCall(MatCreateVecs(A, &xmask, NULL));
863   PetscCall(VecDuplicate(l->lvec, &lmask));
864   PetscCall(VecGetArray(xmask, &bb));
865   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
866   PetscCall(VecRestoreArray(xmask, &bb));
867   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
868   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
869   PetscCall(VecDestroy(&xmask));
870   if (x && b) { /* this code is buggy when the row and column layout don't match */
871     PetscBool cong;
872 
873     PetscCall(MatHasCongruentLayouts(A, &cong));
874     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
875     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
876     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
877     PetscCall(VecGetArrayRead(l->lvec, &xx));
878     PetscCall(VecGetArray(b, &bb));
879   }
880   PetscCall(VecGetArray(lmask, &mask));
881   /* remove zeroed rows of off diagonal matrix */
882   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
883   ii = aij->i;
884   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
885   /* loop over all elements of off process part of matrix zeroing removed columns*/
886   if (aij->compressedrow.use) {
887     m    = aij->compressedrow.nrows;
888     ii   = aij->compressedrow.i;
889     ridx = aij->compressedrow.rindex;
890     for (i = 0; i < m; i++) {
891       n  = ii[i + 1] - ii[i];
892       aj = aij->j + ii[i];
893       aa = aij_a + ii[i];
894 
895       for (j = 0; j < n; j++) {
896         if (PetscAbsScalar(mask[*aj])) {
897           if (b) bb[*ridx] -= *aa * xx[*aj];
898           *aa = 0.0;
899         }
900         aa++;
901         aj++;
902       }
903       ridx++;
904     }
905   } else { /* do not use compressed row format */
906     m = l->B->rmap->n;
907     for (i = 0; i < m; i++) {
908       n  = ii[i + 1] - ii[i];
909       aj = aij->j + ii[i];
910       aa = aij_a + ii[i];
911       for (j = 0; j < n; j++) {
912         if (PetscAbsScalar(mask[*aj])) {
913           if (b) bb[i] -= *aa * xx[*aj];
914           *aa = 0.0;
915         }
916         aa++;
917         aj++;
918       }
919     }
920   }
921   if (x && b) {
922     PetscCall(VecRestoreArray(b, &bb));
923     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
924   }
925   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
926   PetscCall(VecRestoreArray(lmask, &mask));
927   PetscCall(VecDestroy(&lmask));
928   PetscCall(PetscFree(lrows));
929 
930   /* only change matrix nonzero state if pattern was allowed to be changed */
931   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
932     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
933     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) {
939   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
940   PetscInt    nt;
941   VecScatter  Mvctx = a->Mvctx;
942 
943   PetscFunctionBegin;
944   PetscCall(VecGetLocalSize(xx, &nt));
945   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
946   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
947   PetscUseTypeMethod(a->A, mult, xx, yy);
948   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
949   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
950   PetscFunctionReturn(0);
951 }
952 
953 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) {
954   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
955 
956   PetscFunctionBegin;
957   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
962   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
963   VecScatter  Mvctx = a->Mvctx;
964 
965   PetscFunctionBegin;
966   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
967   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
968   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
969   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) {
974   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
975 
976   PetscFunctionBegin;
977   /* do nondiagonal part */
978   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
979   /* do local part */
980   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
981   /* add partial results together */
982   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
983   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
984   PetscFunctionReturn(0);
985 }
986 
987 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) {
988   MPI_Comm    comm;
989   Mat_MPIAIJ *Aij  = (Mat_MPIAIJ *)Amat->data, *Bij;
990   Mat         Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
991   IS          Me, Notme;
992   PetscInt    M, N, first, last, *notme, i;
993   PetscBool   lf;
994   PetscMPIInt size;
995 
996   PetscFunctionBegin;
997   /* Easy test: symmetric diagonal block */
998   Bij  = (Mat_MPIAIJ *)Bmat->data;
999   Bdia = Bij->A;
1000   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1001   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1002   if (!*f) PetscFunctionReturn(0);
1003   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1004   PetscCallMPI(MPI_Comm_size(comm, &size));
1005   if (size == 1) PetscFunctionReturn(0);
1006 
1007   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1008   PetscCall(MatGetSize(Amat, &M, &N));
1009   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1010   PetscCall(PetscMalloc1(N - last + first, &notme));
1011   for (i = 0; i < first; i++) notme[i] = i;
1012   for (i = last; i < M; i++) notme[i - last + first] = i;
1013   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1014   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1015   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1016   Aoff = Aoffs[0];
1017   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1018   Boff = Boffs[0];
1019   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1020   PetscCall(MatDestroyMatrices(1, &Aoffs));
1021   PetscCall(MatDestroyMatrices(1, &Boffs));
1022   PetscCall(ISDestroy(&Me));
1023   PetscCall(ISDestroy(&Notme));
1024   PetscCall(PetscFree(notme));
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) {
1029   PetscFunctionBegin;
1030   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1035   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1036 
1037   PetscFunctionBegin;
1038   /* do nondiagonal part */
1039   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1040   /* do local part */
1041   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1042   /* add partial results together */
1043   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1044   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 /*
1049   This only works correctly for square matrices where the subblock A->A is the
1050    diagonal block
1051 */
1052 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) {
1053   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1054 
1055   PetscFunctionBegin;
1056   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1057   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1058   PetscCall(MatGetDiagonal(a->A, v));
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) {
1063   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1064 
1065   PetscFunctionBegin;
1066   PetscCall(MatScale(a->A, aa));
1067   PetscCall(MatScale(a->B, aa));
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1072 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) {
1073   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1074 
1075   PetscFunctionBegin;
1076   PetscCall(PetscSFDestroy(&aij->coo_sf));
1077   PetscCall(PetscFree(aij->Aperm1));
1078   PetscCall(PetscFree(aij->Bperm1));
1079   PetscCall(PetscFree(aij->Ajmap1));
1080   PetscCall(PetscFree(aij->Bjmap1));
1081 
1082   PetscCall(PetscFree(aij->Aimap2));
1083   PetscCall(PetscFree(aij->Bimap2));
1084   PetscCall(PetscFree(aij->Aperm2));
1085   PetscCall(PetscFree(aij->Bperm2));
1086   PetscCall(PetscFree(aij->Ajmap2));
1087   PetscCall(PetscFree(aij->Bjmap2));
1088 
1089   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1090   PetscCall(PetscFree(aij->Cperm1));
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) {
1095   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1096 
1097   PetscFunctionBegin;
1098 #if defined(PETSC_USE_LOG)
1099   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1100 #endif
1101   PetscCall(MatStashDestroy_Private(&mat->stash));
1102   PetscCall(VecDestroy(&aij->diag));
1103   PetscCall(MatDestroy(&aij->A));
1104   PetscCall(MatDestroy(&aij->B));
1105 #if defined(PETSC_USE_CTABLE)
1106   PetscCall(PetscTableDestroy(&aij->colmap));
1107 #else
1108   PetscCall(PetscFree(aij->colmap));
1109 #endif
1110   PetscCall(PetscFree(aij->garray));
1111   PetscCall(VecDestroy(&aij->lvec));
1112   PetscCall(VecScatterDestroy(&aij->Mvctx));
1113   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
1114   PetscCall(PetscFree(aij->ld));
1115 
1116   /* Free COO */
1117   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1118 
1119   PetscCall(PetscFree(mat->data));
1120 
1121   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1122   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
1123 
1124   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1125   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1126   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1127   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
1128   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
1129   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
1130   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
1131   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1132   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
1133   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
1134 #if defined(PETSC_HAVE_CUDA)
1135   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
1136 #endif
1137 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1138   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
1139 #endif
1140   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
1141 #if defined(PETSC_HAVE_ELEMENTAL)
1142   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
1143 #endif
1144 #if defined(PETSC_HAVE_SCALAPACK)
1145   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
1146 #endif
1147 #if defined(PETSC_HAVE_HYPRE)
1148   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
1149   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
1150 #endif
1151   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1152   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
1153   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
1154   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
1155   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
1156   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
1157 #if defined(PETSC_HAVE_MKL_SPARSE)
1158   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
1159 #endif
1160   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
1161   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) {
1169   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1170   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1171   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1172   const PetscInt    *garray = aij->garray;
1173   const PetscScalar *aa, *ba;
1174   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1175   PetscInt          *rowlens;
1176   PetscInt          *colidxs;
1177   PetscScalar       *matvals;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscViewerSetUp(viewer));
1181 
1182   M  = mat->rmap->N;
1183   N  = mat->cmap->N;
1184   m  = mat->rmap->n;
1185   rs = mat->rmap->rstart;
1186   cs = mat->cmap->rstart;
1187   nz = A->nz + B->nz;
1188 
1189   /* write matrix header */
1190   header[0] = MAT_FILE_CLASSID;
1191   header[1] = M;
1192   header[2] = N;
1193   header[3] = nz;
1194   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1195   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1196 
1197   /* fill in and store row lengths  */
1198   PetscCall(PetscMalloc1(m, &rowlens));
1199   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1200   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1201   PetscCall(PetscFree(rowlens));
1202 
1203   /* fill in and store column indices */
1204   PetscCall(PetscMalloc1(nz, &colidxs));
1205   for (cnt = 0, i = 0; i < m; i++) {
1206     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1207       if (garray[B->j[jb]] > cs) break;
1208       colidxs[cnt++] = garray[B->j[jb]];
1209     }
1210     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1211     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1212   }
1213   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1214   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1215   PetscCall(PetscFree(colidxs));
1216 
1217   /* fill in and store nonzero values */
1218   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1219   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1220   PetscCall(PetscMalloc1(nz, &matvals));
1221   for (cnt = 0, i = 0; i < m; i++) {
1222     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1223       if (garray[B->j[jb]] > cs) break;
1224       matvals[cnt++] = ba[jb];
1225     }
1226     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1227     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1228   }
1229   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1230   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1231   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1232   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1233   PetscCall(PetscFree(matvals));
1234 
1235   /* write block size option to the viewer's .info file */
1236   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #include <petscdraw.h>
1241 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
1242   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1243   PetscMPIInt       rank = aij->rank, size = aij->size;
1244   PetscBool         isdraw, iascii, isbinary;
1245   PetscViewer       sviewer;
1246   PetscViewerFormat format;
1247 
1248   PetscFunctionBegin;
1249   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1250   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1251   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1252   if (iascii) {
1253     PetscCall(PetscViewerGetFormat(viewer, &format));
1254     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1255       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1256       PetscCall(PetscMalloc1(size, &nz));
1257       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1258       for (i = 0; i < (PetscInt)size; i++) {
1259         nmax = PetscMax(nmax, nz[i]);
1260         nmin = PetscMin(nmin, nz[i]);
1261         navg += nz[i];
1262       }
1263       PetscCall(PetscFree(nz));
1264       navg = navg / size;
1265       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1266       PetscFunctionReturn(0);
1267     }
1268     PetscCall(PetscViewerGetFormat(viewer, &format));
1269     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1270       MatInfo   info;
1271       PetscInt *inodes = NULL;
1272 
1273       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1274       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1275       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1276       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1277       if (!inodes) {
1278         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1279                                                      (double)info.memory));
1280       } else {
1281         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1282                                                      (double)info.memory));
1283       }
1284       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1285       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1286       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1287       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1288       PetscCall(PetscViewerFlush(viewer));
1289       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1290       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1291       PetscCall(VecScatterView(aij->Mvctx, viewer));
1292       PetscFunctionReturn(0);
1293     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1294       PetscInt inodecount, inodelimit, *inodes;
1295       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1296       if (inodes) {
1297         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1298       } else {
1299         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1300       }
1301       PetscFunctionReturn(0);
1302     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1303       PetscFunctionReturn(0);
1304     }
1305   } else if (isbinary) {
1306     if (size == 1) {
1307       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1308       PetscCall(MatView(aij->A, viewer));
1309     } else {
1310       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1311     }
1312     PetscFunctionReturn(0);
1313   } else if (iascii && size == 1) {
1314     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1315     PetscCall(MatView(aij->A, viewer));
1316     PetscFunctionReturn(0);
1317   } else if (isdraw) {
1318     PetscDraw draw;
1319     PetscBool isnull;
1320     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1321     PetscCall(PetscDrawIsNull(draw, &isnull));
1322     if (isnull) PetscFunctionReturn(0);
1323   }
1324 
1325   { /* assemble the entire matrix onto first processor */
1326     Mat A = NULL, Av;
1327     IS  isrow, iscol;
1328 
1329     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1330     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1331     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1332     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1333     /*  The commented code uses MatCreateSubMatrices instead */
1334     /*
1335     Mat *AA, A = NULL, Av;
1336     IS  isrow,iscol;
1337 
1338     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1339     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1340     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1341     if (rank == 0) {
1342        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1343        A    = AA[0];
1344        Av   = AA[0];
1345     }
1346     PetscCall(MatDestroySubMatrices(1,&AA));
1347 */
1348     PetscCall(ISDestroy(&iscol));
1349     PetscCall(ISDestroy(&isrow));
1350     /*
1351        Everyone has to call to draw the matrix since the graphics waits are
1352        synchronized across all processors that share the PetscDraw object
1353     */
1354     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1355     if (rank == 0) {
1356       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1357       PetscCall(MatView_SeqAIJ(Av, sviewer));
1358     }
1359     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1360     PetscCall(PetscViewerFlush(viewer));
1361     PetscCall(MatDestroy(&A));
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) {
1367   PetscBool iascii, isdraw, issocket, isbinary;
1368 
1369   PetscFunctionBegin;
1370   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1371   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1372   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1373   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1374   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
1379   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1380   Vec         bb1 = NULL;
1381   PetscBool   hasop;
1382 
1383   PetscFunctionBegin;
1384   if (flag == SOR_APPLY_UPPER) {
1385     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1386     PetscFunctionReturn(0);
1387   }
1388 
1389   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1390 
1391   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1392     if (flag & SOR_ZERO_INITIAL_GUESS) {
1393       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1394       its--;
1395     }
1396 
1397     while (its--) {
1398       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1399       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1400 
1401       /* update rhs: bb1 = bb - B*x */
1402       PetscCall(VecScale(mat->lvec, -1.0));
1403       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1404 
1405       /* local sweep */
1406       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1407     }
1408   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1409     if (flag & SOR_ZERO_INITIAL_GUESS) {
1410       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1411       its--;
1412     }
1413     while (its--) {
1414       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1415       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1416 
1417       /* update rhs: bb1 = bb - B*x */
1418       PetscCall(VecScale(mat->lvec, -1.0));
1419       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1420 
1421       /* local sweep */
1422       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1423     }
1424   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1425     if (flag & SOR_ZERO_INITIAL_GUESS) {
1426       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1427       its--;
1428     }
1429     while (its--) {
1430       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1431       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1432 
1433       /* update rhs: bb1 = bb - B*x */
1434       PetscCall(VecScale(mat->lvec, -1.0));
1435       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1436 
1437       /* local sweep */
1438       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1439     }
1440   } else if (flag & SOR_EISENSTAT) {
1441     Vec xx1;
1442 
1443     PetscCall(VecDuplicate(bb, &xx1));
1444     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1445 
1446     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1447     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1448     if (!mat->diag) {
1449       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1450       PetscCall(MatGetDiagonal(matin, mat->diag));
1451     }
1452     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1453     if (hasop) {
1454       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1455     } else {
1456       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1457     }
1458     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1459 
1460     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1461 
1462     /* local sweep */
1463     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1464     PetscCall(VecAXPY(xx, 1.0, xx1));
1465     PetscCall(VecDestroy(&xx1));
1466   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1467 
1468   PetscCall(VecDestroy(&bb1));
1469 
1470   matin->factorerrortype = mat->A->factorerrortype;
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) {
1475   Mat             aA, aB, Aperm;
1476   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1477   PetscScalar    *aa, *ba;
1478   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1479   PetscSF         rowsf, sf;
1480   IS              parcolp = NULL;
1481   PetscBool       done;
1482 
1483   PetscFunctionBegin;
1484   PetscCall(MatGetLocalSize(A, &m, &n));
1485   PetscCall(ISGetIndices(rowp, &rwant));
1486   PetscCall(ISGetIndices(colp, &cwant));
1487   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1488 
1489   /* Invert row permutation to find out where my rows should go */
1490   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1491   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1492   PetscCall(PetscSFSetFromOptions(rowsf));
1493   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1494   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1495   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1496 
1497   /* Invert column permutation to find out where my columns should go */
1498   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1499   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1500   PetscCall(PetscSFSetFromOptions(sf));
1501   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1502   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1503   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1504   PetscCall(PetscSFDestroy(&sf));
1505 
1506   PetscCall(ISRestoreIndices(rowp, &rwant));
1507   PetscCall(ISRestoreIndices(colp, &cwant));
1508   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1509 
1510   /* Find out where my gcols should go */
1511   PetscCall(MatGetSize(aB, NULL, &ng));
1512   PetscCall(PetscMalloc1(ng, &gcdest));
1513   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1514   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1515   PetscCall(PetscSFSetFromOptions(sf));
1516   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1517   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1518   PetscCall(PetscSFDestroy(&sf));
1519 
1520   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1521   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1522   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1523   for (i = 0; i < m; i++) {
1524     PetscInt    row = rdest[i];
1525     PetscMPIInt rowner;
1526     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1527     for (j = ai[i]; j < ai[i + 1]; j++) {
1528       PetscInt    col = cdest[aj[j]];
1529       PetscMPIInt cowner;
1530       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1531       if (rowner == cowner) dnnz[i]++;
1532       else onnz[i]++;
1533     }
1534     for (j = bi[i]; j < bi[i + 1]; j++) {
1535       PetscInt    col = gcdest[bj[j]];
1536       PetscMPIInt cowner;
1537       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1538       if (rowner == cowner) dnnz[i]++;
1539       else onnz[i]++;
1540     }
1541   }
1542   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1543   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1544   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1545   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1546   PetscCall(PetscSFDestroy(&rowsf));
1547 
1548   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1549   PetscCall(MatSeqAIJGetArray(aA, &aa));
1550   PetscCall(MatSeqAIJGetArray(aB, &ba));
1551   for (i = 0; i < m; i++) {
1552     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1553     PetscInt  j0, rowlen;
1554     rowlen = ai[i + 1] - ai[i];
1555     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1556       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1557       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1558     }
1559     rowlen = bi[i + 1] - bi[i];
1560     for (j0 = j = 0; j < rowlen; j0 = j) {
1561       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1562       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1563     }
1564   }
1565   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1566   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1567   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1568   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1569   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1570   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1571   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1572   PetscCall(PetscFree3(work, rdest, cdest));
1573   PetscCall(PetscFree(gcdest));
1574   if (parcolp) PetscCall(ISDestroy(&colp));
1575   *B = Aperm;
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) {
1580   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1581 
1582   PetscFunctionBegin;
1583   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1584   if (ghosts) *ghosts = aij->garray;
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) {
1589   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1590   Mat            A = mat->A, B = mat->B;
1591   PetscLogDouble isend[5], irecv[5];
1592 
1593   PetscFunctionBegin;
1594   info->block_size = 1.0;
1595   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1596 
1597   isend[0] = info->nz_used;
1598   isend[1] = info->nz_allocated;
1599   isend[2] = info->nz_unneeded;
1600   isend[3] = info->memory;
1601   isend[4] = info->mallocs;
1602 
1603   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1604 
1605   isend[0] += info->nz_used;
1606   isend[1] += info->nz_allocated;
1607   isend[2] += info->nz_unneeded;
1608   isend[3] += info->memory;
1609   isend[4] += info->mallocs;
1610   if (flag == MAT_LOCAL) {
1611     info->nz_used      = isend[0];
1612     info->nz_allocated = isend[1];
1613     info->nz_unneeded  = isend[2];
1614     info->memory       = isend[3];
1615     info->mallocs      = isend[4];
1616   } else if (flag == MAT_GLOBAL_MAX) {
1617     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1618 
1619     info->nz_used      = irecv[0];
1620     info->nz_allocated = irecv[1];
1621     info->nz_unneeded  = irecv[2];
1622     info->memory       = irecv[3];
1623     info->mallocs      = irecv[4];
1624   } else if (flag == MAT_GLOBAL_SUM) {
1625     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1626 
1627     info->nz_used      = irecv[0];
1628     info->nz_allocated = irecv[1];
1629     info->nz_unneeded  = irecv[2];
1630     info->memory       = irecv[3];
1631     info->mallocs      = irecv[4];
1632   }
1633   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1634   info->fill_ratio_needed = 0;
1635   info->factor_mallocs    = 0;
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) {
1640   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1641 
1642   PetscFunctionBegin;
1643   switch (op) {
1644   case MAT_NEW_NONZERO_LOCATIONS:
1645   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1646   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1647   case MAT_KEEP_NONZERO_PATTERN:
1648   case MAT_NEW_NONZERO_LOCATION_ERR:
1649   case MAT_USE_INODES:
1650   case MAT_IGNORE_ZERO_ENTRIES:
1651   case MAT_FORM_EXPLICIT_TRANSPOSE:
1652     MatCheckPreallocated(A, 1);
1653     PetscCall(MatSetOption(a->A, op, flg));
1654     PetscCall(MatSetOption(a->B, op, flg));
1655     break;
1656   case MAT_ROW_ORIENTED:
1657     MatCheckPreallocated(A, 1);
1658     a->roworiented = flg;
1659 
1660     PetscCall(MatSetOption(a->A, op, flg));
1661     PetscCall(MatSetOption(a->B, op, flg));
1662     break;
1663   case MAT_FORCE_DIAGONAL_ENTRIES:
1664   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
1665   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
1666   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1667   case MAT_SPD:
1668   case MAT_SYMMETRIC:
1669   case MAT_STRUCTURALLY_SYMMETRIC:
1670   case MAT_HERMITIAN:
1671   case MAT_SYMMETRY_ETERNAL:
1672   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1673   case MAT_SPD_ETERNAL:
1674     /* if the diagonal matrix is square it inherits some of the properties above */
1675     break;
1676   case MAT_SUBMAT_SINGLEIS: A->submat_singleis = flg; break;
1677   case MAT_STRUCTURE_ONLY:
1678     /* The option is handled directly by MatSetOption() */
1679     break;
1680   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1681   }
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1686   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1687   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1688   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1689   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1690   PetscInt    *cmap, *idx_p;
1691 
1692   PetscFunctionBegin;
1693   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1694   mat->getrowactive = PETSC_TRUE;
1695 
1696   if (!mat->rowvalues && (idx || v)) {
1697     /*
1698         allocate enough space to hold information from the longest row.
1699     */
1700     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1701     PetscInt    max = 1, tmp;
1702     for (i = 0; i < matin->rmap->n; i++) {
1703       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1704       if (max < tmp) max = tmp;
1705     }
1706     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1707   }
1708 
1709   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1710   lrow = row - rstart;
1711 
1712   pvA = &vworkA;
1713   pcA = &cworkA;
1714   pvB = &vworkB;
1715   pcB = &cworkB;
1716   if (!v) {
1717     pvA = NULL;
1718     pvB = NULL;
1719   }
1720   if (!idx) {
1721     pcA = NULL;
1722     if (!v) pcB = NULL;
1723   }
1724   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1725   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1726   nztot = nzA + nzB;
1727 
1728   cmap = mat->garray;
1729   if (v || idx) {
1730     if (nztot) {
1731       /* Sort by increasing column numbers, assuming A and B already sorted */
1732       PetscInt imark = -1;
1733       if (v) {
1734         *v = v_p = mat->rowvalues;
1735         for (i = 0; i < nzB; i++) {
1736           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1737           else break;
1738         }
1739         imark = i;
1740         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1741         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1742       }
1743       if (idx) {
1744         *idx = idx_p = mat->rowindices;
1745         if (imark > -1) {
1746           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1747         } else {
1748           for (i = 0; i < nzB; i++) {
1749             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1750             else break;
1751           }
1752           imark = i;
1753         }
1754         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1755         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1756       }
1757     } else {
1758       if (idx) *idx = NULL;
1759       if (v) *v = NULL;
1760     }
1761   }
1762   *nz = nztot;
1763   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1764   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1769   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1770 
1771   PetscFunctionBegin;
1772   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1773   aij->getrowactive = PETSC_FALSE;
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) {
1778   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1779   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1780   PetscInt         i, j, cstart = mat->cmap->rstart;
1781   PetscReal        sum = 0.0;
1782   const MatScalar *v, *amata, *bmata;
1783 
1784   PetscFunctionBegin;
1785   if (aij->size == 1) {
1786     PetscCall(MatNorm(aij->A, type, norm));
1787   } else {
1788     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1789     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1790     if (type == NORM_FROBENIUS) {
1791       v = amata;
1792       for (i = 0; i < amat->nz; i++) {
1793         sum += PetscRealPart(PetscConj(*v) * (*v));
1794         v++;
1795       }
1796       v = bmata;
1797       for (i = 0; i < bmat->nz; i++) {
1798         sum += PetscRealPart(PetscConj(*v) * (*v));
1799         v++;
1800       }
1801       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1802       *norm = PetscSqrtReal(*norm);
1803       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1804     } else if (type == NORM_1) { /* max column norm */
1805       PetscReal *tmp, *tmp2;
1806       PetscInt  *jj, *garray = aij->garray;
1807       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1808       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1809       *norm = 0.0;
1810       v     = amata;
1811       jj    = amat->j;
1812       for (j = 0; j < amat->nz; j++) {
1813         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1814         v++;
1815       }
1816       v  = bmata;
1817       jj = bmat->j;
1818       for (j = 0; j < bmat->nz; j++) {
1819         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1820         v++;
1821       }
1822       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1823       for (j = 0; j < mat->cmap->N; j++) {
1824         if (tmp2[j] > *norm) *norm = tmp2[j];
1825       }
1826       PetscCall(PetscFree(tmp));
1827       PetscCall(PetscFree(tmp2));
1828       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1829     } else if (type == NORM_INFINITY) { /* max row norm */
1830       PetscReal ntemp = 0.0;
1831       for (j = 0; j < aij->A->rmap->n; j++) {
1832         v   = amata + amat->i[j];
1833         sum = 0.0;
1834         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1835           sum += PetscAbsScalar(*v);
1836           v++;
1837         }
1838         v = bmata + bmat->i[j];
1839         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1840           sum += PetscAbsScalar(*v);
1841           v++;
1842         }
1843         if (sum > ntemp) ntemp = sum;
1844       }
1845       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1846       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1847     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1848     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1849     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1850   }
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) {
1855   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1856   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1857   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1858   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1859   Mat              B, A_diag, *B_diag;
1860   const MatScalar *pbv, *bv;
1861 
1862   PetscFunctionBegin;
1863   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1864   ma = A->rmap->n;
1865   na = A->cmap->n;
1866   mb = a->B->rmap->n;
1867   nb = a->B->cmap->n;
1868   ai = Aloc->i;
1869   aj = Aloc->j;
1870   bi = Bloc->i;
1871   bj = Bloc->j;
1872   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1873     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1874     PetscSFNode         *oloc;
1875     PETSC_UNUSED PetscSF sf;
1876 
1877     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1878     /* compute d_nnz for preallocation */
1879     PetscCall(PetscArrayzero(d_nnz, na));
1880     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1881     /* compute local off-diagonal contributions */
1882     PetscCall(PetscArrayzero(g_nnz, nb));
1883     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1884     /* map those to global */
1885     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1886     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1887     PetscCall(PetscSFSetFromOptions(sf));
1888     PetscCall(PetscArrayzero(o_nnz, na));
1889     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1890     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1891     PetscCall(PetscSFDestroy(&sf));
1892 
1893     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1894     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1895     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1896     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1897     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1898     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1899   } else {
1900     B = *matout;
1901     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1902   }
1903 
1904   b           = (Mat_MPIAIJ *)B->data;
1905   A_diag      = a->A;
1906   B_diag      = &b->A;
1907   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1908   A_diag_ncol = A_diag->cmap->N;
1909   B_diag_ilen = sub_B_diag->ilen;
1910   B_diag_i    = sub_B_diag->i;
1911 
1912   /* Set ilen for diagonal of B */
1913   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1914 
1915   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1916   very quickly (=without using MatSetValues), because all writes are local. */
1917   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1918   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1919 
1920   /* copy over the B part */
1921   PetscCall(PetscMalloc1(bi[mb], &cols));
1922   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1923   pbv = bv;
1924   row = A->rmap->rstart;
1925   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1926   cols_tmp = cols;
1927   for (i = 0; i < mb; i++) {
1928     ncol = bi[i + 1] - bi[i];
1929     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1930     row++;
1931     pbv += ncol;
1932     cols_tmp += ncol;
1933   }
1934   PetscCall(PetscFree(cols));
1935   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1936 
1937   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1938   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1939   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1940     *matout = B;
1941   } else {
1942     PetscCall(MatHeaderMerge(A, &B));
1943   }
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) {
1948   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1949   Mat         a = aij->A, b = aij->B;
1950   PetscInt    s1, s2, s3;
1951 
1952   PetscFunctionBegin;
1953   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1954   if (rr) {
1955     PetscCall(VecGetLocalSize(rr, &s1));
1956     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1957     /* Overlap communication with computation. */
1958     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1959   }
1960   if (ll) {
1961     PetscCall(VecGetLocalSize(ll, &s1));
1962     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1963     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1964   }
1965   /* scale  the diagonal block */
1966   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1967 
1968   if (rr) {
1969     /* Do a scatter end and then right scale the off-diagonal block */
1970     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1971     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1972   }
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) {
1977   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1978 
1979   PetscFunctionBegin;
1980   PetscCall(MatSetUnfactored(a->A));
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) {
1985   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
1986   Mat         a, b, c, d;
1987   PetscBool   flg;
1988 
1989   PetscFunctionBegin;
1990   a = matA->A;
1991   b = matA->B;
1992   c = matB->A;
1993   d = matB->B;
1994 
1995   PetscCall(MatEqual(a, c, &flg));
1996   if (flg) PetscCall(MatEqual(b, d, &flg));
1997   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) {
2002   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2003   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2004 
2005   PetscFunctionBegin;
2006   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2007   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2008     /* because of the column compression in the off-processor part of the matrix a->B,
2009        the number of columns in a->B and b->B may be different, hence we cannot call
2010        the MatCopy() directly on the two parts. If need be, we can provide a more
2011        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2012        then copying the submatrices */
2013     PetscCall(MatCopy_Basic(A, B, str));
2014   } else {
2015     PetscCall(MatCopy(a->A, b->A, str));
2016     PetscCall(MatCopy(a->B, b->B, str));
2017   }
2018   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 PetscErrorCode MatSetUp_MPIAIJ(Mat A) {
2023   PetscFunctionBegin;
2024   PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 /*
2029    Computes the number of nonzeros per row needed for preallocation when X and Y
2030    have different nonzero structure.
2031 */
2032 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) {
2033   PetscInt i, j, k, nzx, nzy;
2034 
2035   PetscFunctionBegin;
2036   /* Set the number of nonzeros in the new matrix */
2037   for (i = 0; i < m; i++) {
2038     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2039     nzx    = xi[i + 1] - xi[i];
2040     nzy    = yi[i + 1] - yi[i];
2041     nnz[i] = 0;
2042     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2043       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2044       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2045       nnz[i]++;
2046     }
2047     for (; k < nzy; k++) nnz[i]++;
2048   }
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2053 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) {
2054   PetscInt    m = Y->rmap->N;
2055   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2056   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2057 
2058   PetscFunctionBegin;
2059   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
2064   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2065 
2066   PetscFunctionBegin;
2067   if (str == SAME_NONZERO_PATTERN) {
2068     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2069     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2070   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2071     PetscCall(MatAXPY_Basic(Y, a, X, str));
2072   } else {
2073     Mat       B;
2074     PetscInt *nnz_d, *nnz_o;
2075 
2076     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2077     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2078     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2079     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2080     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2081     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2082     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2083     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2084     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2085     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2086     PetscCall(MatHeaderMerge(Y, &B));
2087     PetscCall(PetscFree(nnz_d));
2088     PetscCall(PetscFree(nnz_o));
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2094 
2095 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) {
2096   PetscFunctionBegin;
2097   if (PetscDefined(USE_COMPLEX)) {
2098     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2099 
2100     PetscCall(MatConjugate_SeqAIJ(aij->A));
2101     PetscCall(MatConjugate_SeqAIJ(aij->B));
2102   }
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 PetscErrorCode MatRealPart_MPIAIJ(Mat A) {
2107   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2108 
2109   PetscFunctionBegin;
2110   PetscCall(MatRealPart(a->A));
2111   PetscCall(MatRealPart(a->B));
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) {
2116   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2117 
2118   PetscFunctionBegin;
2119   PetscCall(MatImaginaryPart(a->A));
2120   PetscCall(MatImaginaryPart(a->B));
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2125   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2126   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2127   PetscScalar       *va, *vv;
2128   Vec                vB, vA;
2129   const PetscScalar *vb;
2130 
2131   PetscFunctionBegin;
2132   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2133   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2134 
2135   PetscCall(VecGetArrayWrite(vA, &va));
2136   if (idx) {
2137     for (i = 0; i < m; i++) {
2138       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2139     }
2140   }
2141 
2142   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2143   PetscCall(PetscMalloc1(m, &idxb));
2144   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2145 
2146   PetscCall(VecGetArrayWrite(v, &vv));
2147   PetscCall(VecGetArrayRead(vB, &vb));
2148   for (i = 0; i < m; i++) {
2149     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2150       vv[i] = vb[i];
2151       if (idx) idx[i] = a->garray[idxb[i]];
2152     } else {
2153       vv[i] = va[i];
2154       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2155     }
2156   }
2157   PetscCall(VecRestoreArrayWrite(vA, &vv));
2158   PetscCall(VecRestoreArrayWrite(vA, &va));
2159   PetscCall(VecRestoreArrayRead(vB, &vb));
2160   PetscCall(PetscFree(idxb));
2161   PetscCall(VecDestroy(&vA));
2162   PetscCall(VecDestroy(&vB));
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2167   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2168   PetscInt           m = A->rmap->n, n = A->cmap->n;
2169   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2170   PetscInt          *cmap = mat->garray;
2171   PetscInt          *diagIdx, *offdiagIdx;
2172   Vec                diagV, offdiagV;
2173   PetscScalar       *a, *diagA, *offdiagA;
2174   const PetscScalar *ba, *bav;
2175   PetscInt           r, j, col, ncols, *bi, *bj;
2176   Mat                B = mat->B;
2177   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2178 
2179   PetscFunctionBegin;
2180   /* When a process holds entire A and other processes have no entry */
2181   if (A->cmap->N == n) {
2182     PetscCall(VecGetArrayWrite(v, &diagA));
2183     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2184     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2185     PetscCall(VecDestroy(&diagV));
2186     PetscCall(VecRestoreArrayWrite(v, &diagA));
2187     PetscFunctionReturn(0);
2188   } else if (n == 0) {
2189     if (m) {
2190       PetscCall(VecGetArrayWrite(v, &a));
2191       for (r = 0; r < m; r++) {
2192         a[r] = 0.0;
2193         if (idx) idx[r] = -1;
2194       }
2195       PetscCall(VecRestoreArrayWrite(v, &a));
2196     }
2197     PetscFunctionReturn(0);
2198   }
2199 
2200   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2201   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2202   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2203   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2204 
2205   /* Get offdiagIdx[] for implicit 0.0 */
2206   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2207   ba = bav;
2208   bi = b->i;
2209   bj = b->j;
2210   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2211   for (r = 0; r < m; r++) {
2212     ncols = bi[r + 1] - bi[r];
2213     if (ncols == A->cmap->N - n) { /* Brow is dense */
2214       offdiagA[r]   = *ba;
2215       offdiagIdx[r] = cmap[0];
2216     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2217       offdiagA[r] = 0.0;
2218 
2219       /* Find first hole in the cmap */
2220       for (j = 0; j < ncols; j++) {
2221         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2222         if (col > j && j < cstart) {
2223           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2224           break;
2225         } else if (col > j + n && j >= cstart) {
2226           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2227           break;
2228         }
2229       }
2230       if (j == ncols && ncols < A->cmap->N - n) {
2231         /* a hole is outside compressed Bcols */
2232         if (ncols == 0) {
2233           if (cstart) {
2234             offdiagIdx[r] = 0;
2235           } else offdiagIdx[r] = cend;
2236         } else { /* ncols > 0 */
2237           offdiagIdx[r] = cmap[ncols - 1] + 1;
2238           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2239         }
2240       }
2241     }
2242 
2243     for (j = 0; j < ncols; j++) {
2244       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2245         offdiagA[r]   = *ba;
2246         offdiagIdx[r] = cmap[*bj];
2247       }
2248       ba++;
2249       bj++;
2250     }
2251   }
2252 
2253   PetscCall(VecGetArrayWrite(v, &a));
2254   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2255   for (r = 0; r < m; ++r) {
2256     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2257       a[r] = diagA[r];
2258       if (idx) idx[r] = cstart + diagIdx[r];
2259     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2260       a[r] = diagA[r];
2261       if (idx) {
2262         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2263           idx[r] = cstart + diagIdx[r];
2264         } else idx[r] = offdiagIdx[r];
2265       }
2266     } else {
2267       a[r] = offdiagA[r];
2268       if (idx) idx[r] = offdiagIdx[r];
2269     }
2270   }
2271   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2272   PetscCall(VecRestoreArrayWrite(v, &a));
2273   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2274   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2275   PetscCall(VecDestroy(&diagV));
2276   PetscCall(VecDestroy(&offdiagV));
2277   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2282   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2283   PetscInt           m = A->rmap->n, n = A->cmap->n;
2284   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2285   PetscInt          *cmap = mat->garray;
2286   PetscInt          *diagIdx, *offdiagIdx;
2287   Vec                diagV, offdiagV;
2288   PetscScalar       *a, *diagA, *offdiagA;
2289   const PetscScalar *ba, *bav;
2290   PetscInt           r, j, col, ncols, *bi, *bj;
2291   Mat                B = mat->B;
2292   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2293 
2294   PetscFunctionBegin;
2295   /* When a process holds entire A and other processes have no entry */
2296   if (A->cmap->N == n) {
2297     PetscCall(VecGetArrayWrite(v, &diagA));
2298     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2299     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2300     PetscCall(VecDestroy(&diagV));
2301     PetscCall(VecRestoreArrayWrite(v, &diagA));
2302     PetscFunctionReturn(0);
2303   } else if (n == 0) {
2304     if (m) {
2305       PetscCall(VecGetArrayWrite(v, &a));
2306       for (r = 0; r < m; r++) {
2307         a[r] = PETSC_MAX_REAL;
2308         if (idx) idx[r] = -1;
2309       }
2310       PetscCall(VecRestoreArrayWrite(v, &a));
2311     }
2312     PetscFunctionReturn(0);
2313   }
2314 
2315   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2316   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2317   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2318   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2319 
2320   /* Get offdiagIdx[] for implicit 0.0 */
2321   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2322   ba = bav;
2323   bi = b->i;
2324   bj = b->j;
2325   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2326   for (r = 0; r < m; r++) {
2327     ncols = bi[r + 1] - bi[r];
2328     if (ncols == A->cmap->N - n) { /* Brow is dense */
2329       offdiagA[r]   = *ba;
2330       offdiagIdx[r] = cmap[0];
2331     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2332       offdiagA[r] = 0.0;
2333 
2334       /* Find first hole in the cmap */
2335       for (j = 0; j < ncols; j++) {
2336         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2337         if (col > j && j < cstart) {
2338           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2339           break;
2340         } else if (col > j + n && j >= cstart) {
2341           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2342           break;
2343         }
2344       }
2345       if (j == ncols && ncols < A->cmap->N - n) {
2346         /* a hole is outside compressed Bcols */
2347         if (ncols == 0) {
2348           if (cstart) {
2349             offdiagIdx[r] = 0;
2350           } else offdiagIdx[r] = cend;
2351         } else { /* ncols > 0 */
2352           offdiagIdx[r] = cmap[ncols - 1] + 1;
2353           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2354         }
2355       }
2356     }
2357 
2358     for (j = 0; j < ncols; j++) {
2359       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2360         offdiagA[r]   = *ba;
2361         offdiagIdx[r] = cmap[*bj];
2362       }
2363       ba++;
2364       bj++;
2365     }
2366   }
2367 
2368   PetscCall(VecGetArrayWrite(v, &a));
2369   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2370   for (r = 0; r < m; ++r) {
2371     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2372       a[r] = diagA[r];
2373       if (idx) idx[r] = cstart + diagIdx[r];
2374     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2375       a[r] = diagA[r];
2376       if (idx) {
2377         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2378           idx[r] = cstart + diagIdx[r];
2379         } else idx[r] = offdiagIdx[r];
2380       }
2381     } else {
2382       a[r] = offdiagA[r];
2383       if (idx) idx[r] = offdiagIdx[r];
2384     }
2385   }
2386   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2387   PetscCall(VecRestoreArrayWrite(v, &a));
2388   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2389   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2390   PetscCall(VecDestroy(&diagV));
2391   PetscCall(VecDestroy(&offdiagV));
2392   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2397   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2398   PetscInt           m = A->rmap->n, n = A->cmap->n;
2399   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2400   PetscInt          *cmap = mat->garray;
2401   PetscInt          *diagIdx, *offdiagIdx;
2402   Vec                diagV, offdiagV;
2403   PetscScalar       *a, *diagA, *offdiagA;
2404   const PetscScalar *ba, *bav;
2405   PetscInt           r, j, col, ncols, *bi, *bj;
2406   Mat                B = mat->B;
2407   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2408 
2409   PetscFunctionBegin;
2410   /* When a process holds entire A and other processes have no entry */
2411   if (A->cmap->N == n) {
2412     PetscCall(VecGetArrayWrite(v, &diagA));
2413     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2414     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2415     PetscCall(VecDestroy(&diagV));
2416     PetscCall(VecRestoreArrayWrite(v, &diagA));
2417     PetscFunctionReturn(0);
2418   } else if (n == 0) {
2419     if (m) {
2420       PetscCall(VecGetArrayWrite(v, &a));
2421       for (r = 0; r < m; r++) {
2422         a[r] = PETSC_MIN_REAL;
2423         if (idx) idx[r] = -1;
2424       }
2425       PetscCall(VecRestoreArrayWrite(v, &a));
2426     }
2427     PetscFunctionReturn(0);
2428   }
2429 
2430   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2431   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2432   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2433   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2434 
2435   /* Get offdiagIdx[] for implicit 0.0 */
2436   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2437   ba = bav;
2438   bi = b->i;
2439   bj = b->j;
2440   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2441   for (r = 0; r < m; r++) {
2442     ncols = bi[r + 1] - bi[r];
2443     if (ncols == A->cmap->N - n) { /* Brow is dense */
2444       offdiagA[r]   = *ba;
2445       offdiagIdx[r] = cmap[0];
2446     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2447       offdiagA[r] = 0.0;
2448 
2449       /* Find first hole in the cmap */
2450       for (j = 0; j < ncols; j++) {
2451         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2452         if (col > j && j < cstart) {
2453           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2454           break;
2455         } else if (col > j + n && j >= cstart) {
2456           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2457           break;
2458         }
2459       }
2460       if (j == ncols && ncols < A->cmap->N - n) {
2461         /* a hole is outside compressed Bcols */
2462         if (ncols == 0) {
2463           if (cstart) {
2464             offdiagIdx[r] = 0;
2465           } else offdiagIdx[r] = cend;
2466         } else { /* ncols > 0 */
2467           offdiagIdx[r] = cmap[ncols - 1] + 1;
2468           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2469         }
2470       }
2471     }
2472 
2473     for (j = 0; j < ncols; j++) {
2474       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2475         offdiagA[r]   = *ba;
2476         offdiagIdx[r] = cmap[*bj];
2477       }
2478       ba++;
2479       bj++;
2480     }
2481   }
2482 
2483   PetscCall(VecGetArrayWrite(v, &a));
2484   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2485   for (r = 0; r < m; ++r) {
2486     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2487       a[r] = diagA[r];
2488       if (idx) idx[r] = cstart + diagIdx[r];
2489     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2490       a[r] = diagA[r];
2491       if (idx) {
2492         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2493           idx[r] = cstart + diagIdx[r];
2494         } else idx[r] = offdiagIdx[r];
2495       }
2496     } else {
2497       a[r] = offdiagA[r];
2498       if (idx) idx[r] = offdiagIdx[r];
2499     }
2500   }
2501   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2502   PetscCall(VecRestoreArrayWrite(v, &a));
2503   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2504   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2505   PetscCall(VecDestroy(&diagV));
2506   PetscCall(VecDestroy(&offdiagV));
2507   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) {
2512   Mat *dummy;
2513 
2514   PetscFunctionBegin;
2515   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2516   *newmat = *dummy;
2517   PetscCall(PetscFree(dummy));
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) {
2522   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2523 
2524   PetscFunctionBegin;
2525   PetscCall(MatInvertBlockDiagonal(a->A, values));
2526   A->factorerrortype = a->A->factorerrortype;
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) {
2531   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2532 
2533   PetscFunctionBegin;
2534   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2535   PetscCall(MatSetRandom(aij->A, rctx));
2536   if (x->assembled) {
2537     PetscCall(MatSetRandom(aij->B, rctx));
2538   } else {
2539     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2540   }
2541   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2542   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) {
2547   PetscFunctionBegin;
2548   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2549   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 /*@
2554    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2555 
2556    Not collective
2557 
2558    Input Parameter:
2559 .    A - the matrix
2560 
2561    Output Parameter:
2562 .    nz - the number of nonzeros
2563 
2564  Level: advanced
2565 
2566 .seealso: `MATMPIAIJ`, `Mat`
2567 @*/
2568 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) {
2569   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2570   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2571 
2572   PetscFunctionBegin;
2573   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 /*@
2578    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2579 
2580    Collective on A
2581 
2582    Input Parameters:
2583 +    A - the matrix
2584 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2585 
2586  Level: advanced
2587 
2588 @*/
2589 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) {
2590   PetscFunctionBegin;
2591   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) {
2596   PetscBool sc = PETSC_FALSE, flg;
2597 
2598   PetscFunctionBegin;
2599   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2600   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2601   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2602   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2603   PetscOptionsHeadEnd();
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) {
2608   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2609   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2610 
2611   PetscFunctionBegin;
2612   if (!Y->preallocated) {
2613     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2614   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2615     PetscInt nonew = aij->nonew;
2616     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2617     aij->nonew = nonew;
2618   }
2619   PetscCall(MatShift_Basic(Y, a));
2620   PetscFunctionReturn(0);
2621 }
2622 
2623 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) {
2624   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2625 
2626   PetscFunctionBegin;
2627   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2628   PetscCall(MatMissingDiagonal(a->A, missing, d));
2629   if (d) {
2630     PetscInt rstart;
2631     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2632     *d += rstart;
2633   }
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) {
2638   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2639 
2640   PetscFunctionBegin;
2641   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 /* -------------------------------------------------------------------*/
2646 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2647                                        MatGetRow_MPIAIJ,
2648                                        MatRestoreRow_MPIAIJ,
2649                                        MatMult_MPIAIJ,
2650                                        /* 4*/ MatMultAdd_MPIAIJ,
2651                                        MatMultTranspose_MPIAIJ,
2652                                        MatMultTransposeAdd_MPIAIJ,
2653                                        NULL,
2654                                        NULL,
2655                                        NULL,
2656                                        /*10*/ NULL,
2657                                        NULL,
2658                                        NULL,
2659                                        MatSOR_MPIAIJ,
2660                                        MatTranspose_MPIAIJ,
2661                                        /*15*/ MatGetInfo_MPIAIJ,
2662                                        MatEqual_MPIAIJ,
2663                                        MatGetDiagonal_MPIAIJ,
2664                                        MatDiagonalScale_MPIAIJ,
2665                                        MatNorm_MPIAIJ,
2666                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2667                                        MatAssemblyEnd_MPIAIJ,
2668                                        MatSetOption_MPIAIJ,
2669                                        MatZeroEntries_MPIAIJ,
2670                                        /*24*/ MatZeroRows_MPIAIJ,
2671                                        NULL,
2672                                        NULL,
2673                                        NULL,
2674                                        NULL,
2675                                        /*29*/ MatSetUp_MPIAIJ,
2676                                        NULL,
2677                                        NULL,
2678                                        MatGetDiagonalBlock_MPIAIJ,
2679                                        NULL,
2680                                        /*34*/ MatDuplicate_MPIAIJ,
2681                                        NULL,
2682                                        NULL,
2683                                        NULL,
2684                                        NULL,
2685                                        /*39*/ MatAXPY_MPIAIJ,
2686                                        MatCreateSubMatrices_MPIAIJ,
2687                                        MatIncreaseOverlap_MPIAIJ,
2688                                        MatGetValues_MPIAIJ,
2689                                        MatCopy_MPIAIJ,
2690                                        /*44*/ MatGetRowMax_MPIAIJ,
2691                                        MatScale_MPIAIJ,
2692                                        MatShift_MPIAIJ,
2693                                        MatDiagonalSet_MPIAIJ,
2694                                        MatZeroRowsColumns_MPIAIJ,
2695                                        /*49*/ MatSetRandom_MPIAIJ,
2696                                        MatGetRowIJ_MPIAIJ,
2697                                        MatRestoreRowIJ_MPIAIJ,
2698                                        NULL,
2699                                        NULL,
2700                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2701                                        NULL,
2702                                        MatSetUnfactored_MPIAIJ,
2703                                        MatPermute_MPIAIJ,
2704                                        NULL,
2705                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2706                                        MatDestroy_MPIAIJ,
2707                                        MatView_MPIAIJ,
2708                                        NULL,
2709                                        NULL,
2710                                        /*64*/ NULL,
2711                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2712                                        NULL,
2713                                        NULL,
2714                                        NULL,
2715                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2716                                        MatGetRowMinAbs_MPIAIJ,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                        /*75*/ MatFDColoringApply_AIJ,
2722                                        MatSetFromOptions_MPIAIJ,
2723                                        NULL,
2724                                        NULL,
2725                                        MatFindZeroDiagonals_MPIAIJ,
2726                                        /*80*/ NULL,
2727                                        NULL,
2728                                        NULL,
2729                                        /*83*/ MatLoad_MPIAIJ,
2730                                        MatIsSymmetric_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                        /*89*/ NULL,
2736                                        NULL,
2737                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2741                                        NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        MatBindToCPU_MPIAIJ,
2745                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2746                                        NULL,
2747                                        NULL,
2748                                        MatConjugate_MPIAIJ,
2749                                        NULL,
2750                                        /*104*/ MatSetValuesRow_MPIAIJ,
2751                                        MatRealPart_MPIAIJ,
2752                                        MatImaginaryPart_MPIAIJ,
2753                                        NULL,
2754                                        NULL,
2755                                        /*109*/ NULL,
2756                                        NULL,
2757                                        MatGetRowMin_MPIAIJ,
2758                                        NULL,
2759                                        MatMissingDiagonal_MPIAIJ,
2760                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2761                                        NULL,
2762                                        MatGetGhosts_MPIAIJ,
2763                                        NULL,
2764                                        NULL,
2765                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        MatGetMultiProcBlock_MPIAIJ,
2770                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2771                                        MatGetColumnReductions_MPIAIJ,
2772                                        MatInvertBlockDiagonal_MPIAIJ,
2773                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2774                                        MatCreateSubMatricesMPI_MPIAIJ,
2775                                        /*129*/ NULL,
2776                                        NULL,
2777                                        NULL,
2778                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2779                                        NULL,
2780                                        /*134*/ NULL,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                        NULL,
2785                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        MatFDColoringSetUp_MPIXAIJ,
2789                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2790                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2791                                        /*145*/ NULL,
2792                                        NULL,
2793                                        NULL,
2794                                        MatCreateGraph_Simple_AIJ,
2795                                        MatFilter_AIJ,
2796                                        /*150*/ NULL};
2797 
2798 /* ----------------------------------------------------------------------------------------*/
2799 
2800 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) {
2801   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2802 
2803   PetscFunctionBegin;
2804   PetscCall(MatStoreValues(aij->A));
2805   PetscCall(MatStoreValues(aij->B));
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) {
2810   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2811 
2812   PetscFunctionBegin;
2813   PetscCall(MatRetrieveValues(aij->A));
2814   PetscCall(MatRetrieveValues(aij->B));
2815   PetscFunctionReturn(0);
2816 }
2817 
2818 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
2819   Mat_MPIAIJ *b;
2820   PetscMPIInt size;
2821 
2822   PetscFunctionBegin;
2823   PetscCall(PetscLayoutSetUp(B->rmap));
2824   PetscCall(PetscLayoutSetUp(B->cmap));
2825   b = (Mat_MPIAIJ *)B->data;
2826 
2827 #if defined(PETSC_USE_CTABLE)
2828   PetscCall(PetscTableDestroy(&b->colmap));
2829 #else
2830   PetscCall(PetscFree(b->colmap));
2831 #endif
2832   PetscCall(PetscFree(b->garray));
2833   PetscCall(VecDestroy(&b->lvec));
2834   PetscCall(VecScatterDestroy(&b->Mvctx));
2835 
2836   /* Because the B will have been resized we simply destroy it and create a new one each time */
2837   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2838   PetscCall(MatDestroy(&b->B));
2839   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2840   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2841   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2842   PetscCall(MatSetType(b->B, MATSEQAIJ));
2843   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->B));
2844 
2845   if (!B->preallocated) {
2846     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2847     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2848     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2849     PetscCall(MatSetType(b->A, MATSEQAIJ));
2850     PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->A));
2851   }
2852 
2853   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2854   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2855   B->preallocated  = PETSC_TRUE;
2856   B->was_assembled = PETSC_FALSE;
2857   B->assembled     = PETSC_FALSE;
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) {
2862   Mat_MPIAIJ *b;
2863 
2864   PetscFunctionBegin;
2865   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2866   PetscCall(PetscLayoutSetUp(B->rmap));
2867   PetscCall(PetscLayoutSetUp(B->cmap));
2868   b = (Mat_MPIAIJ *)B->data;
2869 
2870 #if defined(PETSC_USE_CTABLE)
2871   PetscCall(PetscTableDestroy(&b->colmap));
2872 #else
2873   PetscCall(PetscFree(b->colmap));
2874 #endif
2875   PetscCall(PetscFree(b->garray));
2876   PetscCall(VecDestroy(&b->lvec));
2877   PetscCall(VecScatterDestroy(&b->Mvctx));
2878 
2879   PetscCall(MatResetPreallocation(b->A));
2880   PetscCall(MatResetPreallocation(b->B));
2881   B->preallocated  = PETSC_TRUE;
2882   B->was_assembled = PETSC_FALSE;
2883   B->assembled     = PETSC_FALSE;
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) {
2888   Mat         mat;
2889   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2890 
2891   PetscFunctionBegin;
2892   *newmat = NULL;
2893   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2894   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2895   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2896   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2897   a = (Mat_MPIAIJ *)mat->data;
2898 
2899   mat->factortype   = matin->factortype;
2900   mat->assembled    = matin->assembled;
2901   mat->insertmode   = NOT_SET_VALUES;
2902   mat->preallocated = matin->preallocated;
2903 
2904   a->size         = oldmat->size;
2905   a->rank         = oldmat->rank;
2906   a->donotstash   = oldmat->donotstash;
2907   a->roworiented  = oldmat->roworiented;
2908   a->rowindices   = NULL;
2909   a->rowvalues    = NULL;
2910   a->getrowactive = PETSC_FALSE;
2911 
2912   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2913   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2914 
2915   if (oldmat->colmap) {
2916 #if defined(PETSC_USE_CTABLE)
2917     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
2918 #else
2919     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2920     PetscCall(PetscLogObjectMemory((PetscObject)mat, (mat->cmap->N) * sizeof(PetscInt)));
2921     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2922 #endif
2923   } else a->colmap = NULL;
2924   if (oldmat->garray) {
2925     PetscInt len;
2926     len = oldmat->B->cmap->n;
2927     PetscCall(PetscMalloc1(len + 1, &a->garray));
2928     PetscCall(PetscLogObjectMemory((PetscObject)mat, len * sizeof(PetscInt)));
2929     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2930   } else a->garray = NULL;
2931 
2932   /* It may happen MatDuplicate is called with a non-assembled matrix
2933      In fact, MatDuplicate only requires the matrix to be preallocated
2934      This may happen inside a DMCreateMatrix_Shell */
2935   if (oldmat->lvec) {
2936     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2937     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->lvec));
2938   }
2939   if (oldmat->Mvctx) {
2940     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2941     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->Mvctx));
2942   }
2943   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2944   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
2945   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2946   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->B));
2947   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2948   *newmat = mat;
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) {
2953   PetscBool isbinary, ishdf5;
2954 
2955   PetscFunctionBegin;
2956   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2957   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2958   /* force binary viewer to load .info file if it has not yet done so */
2959   PetscCall(PetscViewerSetUp(viewer));
2960   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2961   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2962   if (isbinary) {
2963     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
2964   } else if (ishdf5) {
2965 #if defined(PETSC_HAVE_HDF5)
2966     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
2967 #else
2968     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2969 #endif
2970   } else {
2971     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2972   }
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) {
2977   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
2978   PetscInt    *rowidxs, *colidxs;
2979   PetscScalar *matvals;
2980 
2981   PetscFunctionBegin;
2982   PetscCall(PetscViewerSetUp(viewer));
2983 
2984   /* read in matrix header */
2985   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
2986   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
2987   M  = header[1];
2988   N  = header[2];
2989   nz = header[3];
2990   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
2991   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
2992   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
2993 
2994   /* set block sizes from the viewer's .info file */
2995   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
2996   /* set global sizes if not set already */
2997   if (mat->rmap->N < 0) mat->rmap->N = M;
2998   if (mat->cmap->N < 0) mat->cmap->N = N;
2999   PetscCall(PetscLayoutSetUp(mat->rmap));
3000   PetscCall(PetscLayoutSetUp(mat->cmap));
3001 
3002   /* check if the matrix sizes are correct */
3003   PetscCall(MatGetSize(mat, &rows, &cols));
3004   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3005 
3006   /* read in row lengths and build row indices */
3007   PetscCall(MatGetLocalSize(mat, &m, NULL));
3008   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3009   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3010   rowidxs[0] = 0;
3011   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3012   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3013   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3014   /* read in column indices and matrix values */
3015   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3016   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3017   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3018   /* store matrix indices and values */
3019   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3020   PetscCall(PetscFree(rowidxs));
3021   PetscCall(PetscFree2(colidxs, matvals));
3022   PetscFunctionReturn(0);
3023 }
3024 
3025 /* Not scalable because of ISAllGather() unless getting all columns. */
3026 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) {
3027   IS          iscol_local;
3028   PetscBool   isstride;
3029   PetscMPIInt lisstride = 0, gisstride;
3030 
3031   PetscFunctionBegin;
3032   /* check if we are grabbing all columns*/
3033   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3034 
3035   if (isstride) {
3036     PetscInt start, len, mstart, mlen;
3037     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3038     PetscCall(ISGetLocalSize(iscol, &len));
3039     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3040     if (mstart == start && mlen - mstart == len) lisstride = 1;
3041   }
3042 
3043   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3044   if (gisstride) {
3045     PetscInt N;
3046     PetscCall(MatGetSize(mat, NULL, &N));
3047     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3048     PetscCall(ISSetIdentity(iscol_local));
3049     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3050   } else {
3051     PetscInt cbs;
3052     PetscCall(ISGetBlockSize(iscol, &cbs));
3053     PetscCall(ISAllGather(iscol, &iscol_local));
3054     PetscCall(ISSetBlockSize(iscol_local, cbs));
3055   }
3056 
3057   *isseq = iscol_local;
3058   PetscFunctionReturn(0);
3059 }
3060 
3061 /*
3062  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3063  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3064 
3065  Input Parameters:
3066    mat - matrix
3067    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3068            i.e., mat->rstart <= isrow[i] < mat->rend
3069    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3070            i.e., mat->cstart <= iscol[i] < mat->cend
3071  Output Parameter:
3072    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3073    iscol_o - sequential column index set for retrieving mat->B
3074    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3075  */
3076 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) {
3077   Vec             x, cmap;
3078   const PetscInt *is_idx;
3079   PetscScalar    *xarray, *cmaparray;
3080   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3081   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3082   Mat             B    = a->B;
3083   Vec             lvec = a->lvec, lcmap;
3084   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3085   MPI_Comm        comm;
3086   VecScatter      Mvctx = a->Mvctx;
3087 
3088   PetscFunctionBegin;
3089   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3090   PetscCall(ISGetLocalSize(iscol, &ncols));
3091 
3092   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3093   PetscCall(MatCreateVecs(mat, &x, NULL));
3094   PetscCall(VecSet(x, -1.0));
3095   PetscCall(VecDuplicate(x, &cmap));
3096   PetscCall(VecSet(cmap, -1.0));
3097 
3098   /* Get start indices */
3099   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3100   isstart -= ncols;
3101   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3102 
3103   PetscCall(ISGetIndices(iscol, &is_idx));
3104   PetscCall(VecGetArray(x, &xarray));
3105   PetscCall(VecGetArray(cmap, &cmaparray));
3106   PetscCall(PetscMalloc1(ncols, &idx));
3107   for (i = 0; i < ncols; i++) {
3108     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3109     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3110     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3111   }
3112   PetscCall(VecRestoreArray(x, &xarray));
3113   PetscCall(VecRestoreArray(cmap, &cmaparray));
3114   PetscCall(ISRestoreIndices(iscol, &is_idx));
3115 
3116   /* Get iscol_d */
3117   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3118   PetscCall(ISGetBlockSize(iscol, &i));
3119   PetscCall(ISSetBlockSize(*iscol_d, i));
3120 
3121   /* Get isrow_d */
3122   PetscCall(ISGetLocalSize(isrow, &m));
3123   rstart = mat->rmap->rstart;
3124   PetscCall(PetscMalloc1(m, &idx));
3125   PetscCall(ISGetIndices(isrow, &is_idx));
3126   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3127   PetscCall(ISRestoreIndices(isrow, &is_idx));
3128 
3129   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3130   PetscCall(ISGetBlockSize(isrow, &i));
3131   PetscCall(ISSetBlockSize(*isrow_d, i));
3132 
3133   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3134   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3135   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3136 
3137   PetscCall(VecDuplicate(lvec, &lcmap));
3138 
3139   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3140   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3141 
3142   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3143   /* off-process column indices */
3144   count = 0;
3145   PetscCall(PetscMalloc1(Bn, &idx));
3146   PetscCall(PetscMalloc1(Bn, &cmap1));
3147 
3148   PetscCall(VecGetArray(lvec, &xarray));
3149   PetscCall(VecGetArray(lcmap, &cmaparray));
3150   for (i = 0; i < Bn; i++) {
3151     if (PetscRealPart(xarray[i]) > -1.0) {
3152       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3153       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3154       count++;
3155     }
3156   }
3157   PetscCall(VecRestoreArray(lvec, &xarray));
3158   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3159 
3160   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3161   /* cannot ensure iscol_o has same blocksize as iscol! */
3162 
3163   PetscCall(PetscFree(idx));
3164   *garray = cmap1;
3165 
3166   PetscCall(VecDestroy(&x));
3167   PetscCall(VecDestroy(&cmap));
3168   PetscCall(VecDestroy(&lcmap));
3169   PetscFunctionReturn(0);
3170 }
3171 
3172 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3173 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) {
3174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3175   Mat         M = NULL;
3176   MPI_Comm    comm;
3177   IS          iscol_d, isrow_d, iscol_o;
3178   Mat         Asub = NULL, Bsub = NULL;
3179   PetscInt    n;
3180 
3181   PetscFunctionBegin;
3182   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3183 
3184   if (call == MAT_REUSE_MATRIX) {
3185     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3186     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3187     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3188 
3189     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3190     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3191 
3192     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3193     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3194 
3195     /* Update diagonal and off-diagonal portions of submat */
3196     asub = (Mat_MPIAIJ *)(*submat)->data;
3197     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3198     PetscCall(ISGetLocalSize(iscol_o, &n));
3199     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3200     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3201     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3202 
3203   } else { /* call == MAT_INITIAL_MATRIX) */
3204     const PetscInt *garray;
3205     PetscInt        BsubN;
3206 
3207     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3208     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3209 
3210     /* Create local submatrices Asub and Bsub */
3211     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3212     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3213 
3214     /* Create submatrix M */
3215     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3216 
3217     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3218     asub = (Mat_MPIAIJ *)M->data;
3219 
3220     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3221     n = asub->B->cmap->N;
3222     if (BsubN > n) {
3223       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3224       const PetscInt *idx;
3225       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3226       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3227 
3228       PetscCall(PetscMalloc1(n, &idx_new));
3229       j = 0;
3230       PetscCall(ISGetIndices(iscol_o, &idx));
3231       for (i = 0; i < n; i++) {
3232         if (j >= BsubN) break;
3233         while (subgarray[i] > garray[j]) j++;
3234 
3235         if (subgarray[i] == garray[j]) {
3236           idx_new[i] = idx[j++];
3237         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3238       }
3239       PetscCall(ISRestoreIndices(iscol_o, &idx));
3240 
3241       PetscCall(ISDestroy(&iscol_o));
3242       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3243 
3244     } else if (BsubN < n) {
3245       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3246     }
3247 
3248     PetscCall(PetscFree(garray));
3249     *submat = M;
3250 
3251     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3252     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3253     PetscCall(ISDestroy(&isrow_d));
3254 
3255     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3256     PetscCall(ISDestroy(&iscol_d));
3257 
3258     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3259     PetscCall(ISDestroy(&iscol_o));
3260   }
3261   PetscFunctionReturn(0);
3262 }
3263 
3264 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) {
3265   IS        iscol_local = NULL, isrow_d;
3266   PetscInt  csize;
3267   PetscInt  n, i, j, start, end;
3268   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3269   MPI_Comm  comm;
3270 
3271   PetscFunctionBegin;
3272   /* If isrow has same processor distribution as mat,
3273      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3274   if (call == MAT_REUSE_MATRIX) {
3275     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3276     if (isrow_d) {
3277       sameRowDist  = PETSC_TRUE;
3278       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3279     } else {
3280       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3281       if (iscol_local) {
3282         sameRowDist  = PETSC_TRUE;
3283         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3284       }
3285     }
3286   } else {
3287     /* Check if isrow has same processor distribution as mat */
3288     sameDist[0] = PETSC_FALSE;
3289     PetscCall(ISGetLocalSize(isrow, &n));
3290     if (!n) {
3291       sameDist[0] = PETSC_TRUE;
3292     } else {
3293       PetscCall(ISGetMinMax(isrow, &i, &j));
3294       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3295       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3296     }
3297 
3298     /* Check if iscol has same processor distribution as mat */
3299     sameDist[1] = PETSC_FALSE;
3300     PetscCall(ISGetLocalSize(iscol, &n));
3301     if (!n) {
3302       sameDist[1] = PETSC_TRUE;
3303     } else {
3304       PetscCall(ISGetMinMax(iscol, &i, &j));
3305       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3306       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3307     }
3308 
3309     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3310     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3311     sameRowDist = tsameDist[0];
3312   }
3313 
3314   if (sameRowDist) {
3315     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3316       /* isrow and iscol have same processor distribution as mat */
3317       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3318       PetscFunctionReturn(0);
3319     } else { /* sameRowDist */
3320       /* isrow has same processor distribution as mat */
3321       if (call == MAT_INITIAL_MATRIX) {
3322         PetscBool sorted;
3323         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3324         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3325         PetscCall(ISGetSize(iscol, &i));
3326         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3327 
3328         PetscCall(ISSorted(iscol_local, &sorted));
3329         if (sorted) {
3330           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3331           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3332           PetscFunctionReturn(0);
3333         }
3334       } else { /* call == MAT_REUSE_MATRIX */
3335         IS iscol_sub;
3336         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3337         if (iscol_sub) {
3338           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3339           PetscFunctionReturn(0);
3340         }
3341       }
3342     }
3343   }
3344 
3345   /* General case: iscol -> iscol_local which has global size of iscol */
3346   if (call == MAT_REUSE_MATRIX) {
3347     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3348     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3349   } else {
3350     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3351   }
3352 
3353   PetscCall(ISGetLocalSize(iscol, &csize));
3354   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3355 
3356   if (call == MAT_INITIAL_MATRIX) {
3357     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3358     PetscCall(ISDestroy(&iscol_local));
3359   }
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 /*@C
3364      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3365          and "off-diagonal" part of the matrix in CSR format.
3366 
3367    Collective
3368 
3369    Input Parameters:
3370 +  comm - MPI communicator
3371 .  A - "diagonal" portion of matrix
3372 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3373 -  garray - global index of B columns
3374 
3375    Output Parameter:
3376 .   mat - the matrix, with input A as its local diagonal matrix
3377    Level: advanced
3378 
3379    Notes:
3380    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3381 
3382    A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3383 
3384 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3385 @*/
3386 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) {
3387   Mat_MPIAIJ        *maij;
3388   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3389   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3390   const PetscScalar *oa;
3391   Mat                Bnew;
3392   PetscInt           m, n, N;
3393   MatType            mpi_mat_type;
3394 
3395   PetscFunctionBegin;
3396   PetscCall(MatCreate(comm, mat));
3397   PetscCall(MatGetSize(A, &m, &n));
3398   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3399   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3400   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3401   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3402 
3403   /* Get global columns of mat */
3404   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3405 
3406   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3407   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3408   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3409   PetscCall(MatSetType(*mat, mpi_mat_type));
3410 
3411   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3412   maij = (Mat_MPIAIJ *)(*mat)->data;
3413 
3414   (*mat)->preallocated = PETSC_TRUE;
3415 
3416   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3417   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3418 
3419   /* Set A as diagonal portion of *mat */
3420   maij->A = A;
3421 
3422   nz = oi[m];
3423   for (i = 0; i < nz; i++) {
3424     col   = oj[i];
3425     oj[i] = garray[col];
3426   }
3427 
3428   /* Set Bnew as off-diagonal portion of *mat */
3429   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3430   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3431   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3432   bnew        = (Mat_SeqAIJ *)Bnew->data;
3433   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3434   maij->B     = Bnew;
3435 
3436   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3437 
3438   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3439   b->free_a       = PETSC_FALSE;
3440   b->free_ij      = PETSC_FALSE;
3441   PetscCall(MatDestroy(&B));
3442 
3443   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3444   bnew->free_a       = PETSC_TRUE;
3445   bnew->free_ij      = PETSC_TRUE;
3446 
3447   /* condense columns of maij->B */
3448   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3449   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3450   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3451   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3452   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3453   PetscFunctionReturn(0);
3454 }
3455 
3456 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3457 
3458 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) {
3459   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3460   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3461   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3462   Mat             M, Msub, B = a->B;
3463   MatScalar      *aa;
3464   Mat_SeqAIJ     *aij;
3465   PetscInt       *garray = a->garray, *colsub, Ncols;
3466   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3467   IS              iscol_sub, iscmap;
3468   const PetscInt *is_idx, *cmap;
3469   PetscBool       allcolumns = PETSC_FALSE;
3470   MPI_Comm        comm;
3471 
3472   PetscFunctionBegin;
3473   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3474   if (call == MAT_REUSE_MATRIX) {
3475     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3476     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3477     PetscCall(ISGetLocalSize(iscol_sub, &count));
3478 
3479     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3480     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3481 
3482     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3483     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3484 
3485     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3486 
3487   } else { /* call == MAT_INITIAL_MATRIX) */
3488     PetscBool flg;
3489 
3490     PetscCall(ISGetLocalSize(iscol, &n));
3491     PetscCall(ISGetSize(iscol, &Ncols));
3492 
3493     /* (1) iscol -> nonscalable iscol_local */
3494     /* Check for special case: each processor gets entire matrix columns */
3495     PetscCall(ISIdentity(iscol_local, &flg));
3496     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3497     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3498     if (allcolumns) {
3499       iscol_sub = iscol_local;
3500       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3501       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3502 
3503     } else {
3504       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3505       PetscInt *idx, *cmap1, k;
3506       PetscCall(PetscMalloc1(Ncols, &idx));
3507       PetscCall(PetscMalloc1(Ncols, &cmap1));
3508       PetscCall(ISGetIndices(iscol_local, &is_idx));
3509       count = 0;
3510       k     = 0;
3511       for (i = 0; i < Ncols; i++) {
3512         j = is_idx[i];
3513         if (j >= cstart && j < cend) {
3514           /* diagonal part of mat */
3515           idx[count]     = j;
3516           cmap1[count++] = i; /* column index in submat */
3517         } else if (Bn) {
3518           /* off-diagonal part of mat */
3519           if (j == garray[k]) {
3520             idx[count]     = j;
3521             cmap1[count++] = i; /* column index in submat */
3522           } else if (j > garray[k]) {
3523             while (j > garray[k] && k < Bn - 1) k++;
3524             if (j == garray[k]) {
3525               idx[count]     = j;
3526               cmap1[count++] = i; /* column index in submat */
3527             }
3528           }
3529         }
3530       }
3531       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3532 
3533       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3534       PetscCall(ISGetBlockSize(iscol, &cbs));
3535       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3536 
3537       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3538     }
3539 
3540     /* (3) Create sequential Msub */
3541     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3542   }
3543 
3544   PetscCall(ISGetLocalSize(iscol_sub, &count));
3545   aij = (Mat_SeqAIJ *)(Msub)->data;
3546   ii  = aij->i;
3547   PetscCall(ISGetIndices(iscmap, &cmap));
3548 
3549   /*
3550       m - number of local rows
3551       Ncols - number of columns (same on all processors)
3552       rstart - first row in new global matrix generated
3553   */
3554   PetscCall(MatGetSize(Msub, &m, NULL));
3555 
3556   if (call == MAT_INITIAL_MATRIX) {
3557     /* (4) Create parallel newmat */
3558     PetscMPIInt rank, size;
3559     PetscInt    csize;
3560 
3561     PetscCallMPI(MPI_Comm_size(comm, &size));
3562     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3563 
3564     /*
3565         Determine the number of non-zeros in the diagonal and off-diagonal
3566         portions of the matrix in order to do correct preallocation
3567     */
3568 
3569     /* first get start and end of "diagonal" columns */
3570     PetscCall(ISGetLocalSize(iscol, &csize));
3571     if (csize == PETSC_DECIDE) {
3572       PetscCall(ISGetSize(isrow, &mglobal));
3573       if (mglobal == Ncols) { /* square matrix */
3574         nlocal = m;
3575       } else {
3576         nlocal = Ncols / size + ((Ncols % size) > rank);
3577       }
3578     } else {
3579       nlocal = csize;
3580     }
3581     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3582     rstart = rend - nlocal;
3583     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3584 
3585     /* next, compute all the lengths */
3586     jj = aij->j;
3587     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3588     olens = dlens + m;
3589     for (i = 0; i < m; i++) {
3590       jend = ii[i + 1] - ii[i];
3591       olen = 0;
3592       dlen = 0;
3593       for (j = 0; j < jend; j++) {
3594         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3595         else dlen++;
3596         jj++;
3597       }
3598       olens[i] = olen;
3599       dlens[i] = dlen;
3600     }
3601 
3602     PetscCall(ISGetBlockSize(isrow, &bs));
3603     PetscCall(ISGetBlockSize(iscol, &cbs));
3604 
3605     PetscCall(MatCreate(comm, &M));
3606     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3607     PetscCall(MatSetBlockSizes(M, bs, cbs));
3608     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3609     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3610     PetscCall(PetscFree(dlens));
3611 
3612   } else { /* call == MAT_REUSE_MATRIX */
3613     M = *newmat;
3614     PetscCall(MatGetLocalSize(M, &i, NULL));
3615     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3616     PetscCall(MatZeroEntries(M));
3617     /*
3618          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3619        rather than the slower MatSetValues().
3620     */
3621     M->was_assembled = PETSC_TRUE;
3622     M->assembled     = PETSC_FALSE;
3623   }
3624 
3625   /* (5) Set values of Msub to *newmat */
3626   PetscCall(PetscMalloc1(count, &colsub));
3627   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3628 
3629   jj = aij->j;
3630   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3631   for (i = 0; i < m; i++) {
3632     row = rstart + i;
3633     nz  = ii[i + 1] - ii[i];
3634     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3635     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3636     jj += nz;
3637     aa += nz;
3638   }
3639   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3640   PetscCall(ISRestoreIndices(iscmap, &cmap));
3641 
3642   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3643   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3644 
3645   PetscCall(PetscFree(colsub));
3646 
3647   /* save Msub, iscol_sub and iscmap used in processor for next request */
3648   if (call == MAT_INITIAL_MATRIX) {
3649     *newmat = M;
3650     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3651     PetscCall(MatDestroy(&Msub));
3652 
3653     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3654     PetscCall(ISDestroy(&iscol_sub));
3655 
3656     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3657     PetscCall(ISDestroy(&iscmap));
3658 
3659     if (iscol_local) {
3660       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3661       PetscCall(ISDestroy(&iscol_local));
3662     }
3663   }
3664   PetscFunctionReturn(0);
3665 }
3666 
3667 /*
3668     Not great since it makes two copies of the submatrix, first an SeqAIJ
3669   in local and then by concatenating the local matrices the end result.
3670   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3671 
3672   This requires a sequential iscol with all indices.
3673 */
3674 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) {
3675   PetscMPIInt rank, size;
3676   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3677   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3678   Mat         M, Mreuse;
3679   MatScalar  *aa, *vwork;
3680   MPI_Comm    comm;
3681   Mat_SeqAIJ *aij;
3682   PetscBool   colflag, allcolumns = PETSC_FALSE;
3683 
3684   PetscFunctionBegin;
3685   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3686   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3687   PetscCallMPI(MPI_Comm_size(comm, &size));
3688 
3689   /* Check for special case: each processor gets entire matrix columns */
3690   PetscCall(ISIdentity(iscol, &colflag));
3691   PetscCall(ISGetLocalSize(iscol, &n));
3692   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3693   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3694 
3695   if (call == MAT_REUSE_MATRIX) {
3696     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3697     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3698     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3699   } else {
3700     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3701   }
3702 
3703   /*
3704       m - number of local rows
3705       n - number of columns (same on all processors)
3706       rstart - first row in new global matrix generated
3707   */
3708   PetscCall(MatGetSize(Mreuse, &m, &n));
3709   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3710   if (call == MAT_INITIAL_MATRIX) {
3711     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3712     ii  = aij->i;
3713     jj  = aij->j;
3714 
3715     /*
3716         Determine the number of non-zeros in the diagonal and off-diagonal
3717         portions of the matrix in order to do correct preallocation
3718     */
3719 
3720     /* first get start and end of "diagonal" columns */
3721     if (csize == PETSC_DECIDE) {
3722       PetscCall(ISGetSize(isrow, &mglobal));
3723       if (mglobal == n) { /* square matrix */
3724         nlocal = m;
3725       } else {
3726         nlocal = n / size + ((n % size) > rank);
3727       }
3728     } else {
3729       nlocal = csize;
3730     }
3731     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3732     rstart = rend - nlocal;
3733     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3734 
3735     /* next, compute all the lengths */
3736     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3737     olens = dlens + m;
3738     for (i = 0; i < m; i++) {
3739       jend = ii[i + 1] - ii[i];
3740       olen = 0;
3741       dlen = 0;
3742       for (j = 0; j < jend; j++) {
3743         if (*jj < rstart || *jj >= rend) olen++;
3744         else dlen++;
3745         jj++;
3746       }
3747       olens[i] = olen;
3748       dlens[i] = dlen;
3749     }
3750     PetscCall(MatCreate(comm, &M));
3751     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3752     PetscCall(MatSetBlockSizes(M, bs, cbs));
3753     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3754     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3755     PetscCall(PetscFree(dlens));
3756   } else {
3757     PetscInt ml, nl;
3758 
3759     M = *newmat;
3760     PetscCall(MatGetLocalSize(M, &ml, &nl));
3761     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3762     PetscCall(MatZeroEntries(M));
3763     /*
3764          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3765        rather than the slower MatSetValues().
3766     */
3767     M->was_assembled = PETSC_TRUE;
3768     M->assembled     = PETSC_FALSE;
3769   }
3770   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3771   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3772   ii  = aij->i;
3773   jj  = aij->j;
3774 
3775   /* trigger copy to CPU if needed */
3776   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3777   for (i = 0; i < m; i++) {
3778     row   = rstart + i;
3779     nz    = ii[i + 1] - ii[i];
3780     cwork = jj;
3781     jj += nz;
3782     vwork = aa;
3783     aa += nz;
3784     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3785   }
3786   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3787 
3788   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3789   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3790   *newmat = M;
3791 
3792   /* save submatrix used in processor for next request */
3793   if (call == MAT_INITIAL_MATRIX) {
3794     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3795     PetscCall(MatDestroy(&Mreuse));
3796   }
3797   PetscFunctionReturn(0);
3798 }
3799 
3800 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) {
3801   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3802   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3803   const PetscInt *JJ;
3804   PetscBool       nooffprocentries;
3805   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3806 
3807   PetscFunctionBegin;
3808   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3809 
3810   PetscCall(PetscLayoutSetUp(B->rmap));
3811   PetscCall(PetscLayoutSetUp(B->cmap));
3812   m      = B->rmap->n;
3813   cstart = B->cmap->rstart;
3814   cend   = B->cmap->rend;
3815   rstart = B->rmap->rstart;
3816 
3817   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3818 
3819   if (PetscDefined(USE_DEBUG)) {
3820     for (i = 0; i < m; i++) {
3821       nnz = Ii[i + 1] - Ii[i];
3822       JJ  = J + Ii[i];
3823       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3824       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3825       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3826     }
3827   }
3828 
3829   for (i = 0; i < m; i++) {
3830     nnz     = Ii[i + 1] - Ii[i];
3831     JJ      = J + Ii[i];
3832     nnz_max = PetscMax(nnz_max, nnz);
3833     d       = 0;
3834     for (j = 0; j < nnz; j++) {
3835       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3836     }
3837     d_nnz[i] = d;
3838     o_nnz[i] = nnz - d;
3839   }
3840   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3841   PetscCall(PetscFree2(d_nnz, o_nnz));
3842 
3843   for (i = 0; i < m; i++) {
3844     ii = i + rstart;
3845     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3846   }
3847   nooffprocentries    = B->nooffprocentries;
3848   B->nooffprocentries = PETSC_TRUE;
3849   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3850   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3851   B->nooffprocentries = nooffprocentries;
3852 
3853   /* count number of entries below block diagonal */
3854   PetscCall(PetscFree(Aij->ld));
3855   PetscCall(PetscCalloc1(m, &ld));
3856   Aij->ld = ld;
3857   for (i = 0; i < m; i++) {
3858     nnz = Ii[i + 1] - Ii[i];
3859     j   = 0;
3860     while (j < nnz && J[j] < cstart) j++;
3861     ld[i] = j;
3862     J += nnz;
3863   }
3864 
3865   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3866   PetscFunctionReturn(0);
3867 }
3868 
3869 /*@
3870    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3871    (the default parallel PETSc format).
3872 
3873    Collective
3874 
3875    Input Parameters:
3876 +  B - the matrix
3877 .  i - the indices into j for the start of each local row (starts with zero)
3878 .  j - the column indices for each local row (starts with zero)
3879 -  v - optional values in the matrix
3880 
3881    Level: developer
3882 
3883    Notes:
3884        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3885      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3886      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3887 
3888        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3889 
3890        The format which is used for the sparse matrix input, is equivalent to a
3891     row-major ordering.. i.e for the following matrix, the input data expected is
3892     as shown
3893 
3894 $        1 0 0
3895 $        2 0 3     P0
3896 $       -------
3897 $        4 5 6     P1
3898 $
3899 $     Process0 [P0]: rows_owned=[0,1]
3900 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3901 $        j =  {0,0,2}  [size = 3]
3902 $        v =  {1,2,3}  [size = 3]
3903 $
3904 $     Process1 [P1]: rows_owned=[2]
3905 $        i =  {0,3}    [size = nrow+1  = 1+1]
3906 $        j =  {0,1,2}  [size = 3]
3907 $        v =  {4,5,6}  [size = 3]
3908 
3909 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3910           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3911 @*/
3912 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
3913   PetscFunctionBegin;
3914   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3915   PetscFunctionReturn(0);
3916 }
3917 
3918 /*@C
3919    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3920    (the default parallel PETSc format).  For good matrix assembly performance
3921    the user should preallocate the matrix storage by setting the parameters
3922    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3923    performance can be increased by more than a factor of 50.
3924 
3925    Collective
3926 
3927    Input Parameters:
3928 +  B - the matrix
3929 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3930            (same value is used for all local rows)
3931 .  d_nnz - array containing the number of nonzeros in the various rows of the
3932            DIAGONAL portion of the local submatrix (possibly different for each row)
3933            or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure.
3934            The size of this array is equal to the number of local rows, i.e 'm'.
3935            For matrices that will be factored, you must leave room for (and set)
3936            the diagonal entry even if it is zero.
3937 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3938            submatrix (same value is used for all local rows).
3939 -  o_nnz - array containing the number of nonzeros in the various rows of the
3940            OFF-DIAGONAL portion of the local submatrix (possibly different for
3941            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero
3942            structure. The size of this array is equal to the number
3943            of local rows, i.e 'm'.
3944 
3945    If the *_nnz parameter is given then the *_nz parameter is ignored
3946 
3947    The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77
3948    storage.  The stored row and column indices begin with zero.
3949    See Users-Manual: ch_mat for details.
3950 
3951    The parallel matrix is partitioned such that the first m0 rows belong to
3952    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3953    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3954 
3955    The DIAGONAL portion of the local submatrix of a processor can be defined
3956    as the submatrix which is obtained by extraction the part corresponding to
3957    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3958    first row that belongs to the processor, r2 is the last row belonging to
3959    the this processor, and c1-c2 is range of indices of the local part of a
3960    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3961    common case of a square matrix, the row and column ranges are the same and
3962    the DIAGONAL part is also square. The remaining portion of the local
3963    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3964 
3965    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3966 
3967    You can call MatGetInfo() to get information on how effective the preallocation was;
3968    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3969    You can also run with the option -info and look for messages with the string
3970    malloc in them to see if additional memory allocation was needed.
3971 
3972    Example usage:
3973 
3974    Consider the following 8x8 matrix with 34 non-zero values, that is
3975    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3976    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3977    as follows:
3978 
3979 .vb
3980             1  2  0  |  0  3  0  |  0  4
3981     Proc0   0  5  6  |  7  0  0  |  8  0
3982             9  0 10  | 11  0  0  | 12  0
3983     -------------------------------------
3984            13  0 14  | 15 16 17  |  0  0
3985     Proc1   0 18  0  | 19 20 21  |  0  0
3986             0  0  0  | 22 23  0  | 24  0
3987     -------------------------------------
3988     Proc2  25 26 27  |  0  0 28  | 29  0
3989            30  0  0  | 31 32 33  |  0 34
3990 .ve
3991 
3992    This can be represented as a collection of submatrices as:
3993 
3994 .vb
3995       A B C
3996       D E F
3997       G H I
3998 .ve
3999 
4000    Where the submatrices A,B,C are owned by proc0, D,E,F are
4001    owned by proc1, G,H,I are owned by proc2.
4002 
4003    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4004    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4005    The 'M','N' parameters are 8,8, and have the same values on all procs.
4006 
4007    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4008    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4009    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4010    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4011    part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ
4012    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4013 
4014    When d_nz, o_nz parameters are specified, d_nz storage elements are
4015    allocated for every row of the local diagonal submatrix, and o_nz
4016    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4017    One way to choose d_nz and o_nz is to use the max nonzerors per local
4018    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4019    In this case, the values of d_nz,o_nz are:
4020 .vb
4021      proc0 : dnz = 2, o_nz = 2
4022      proc1 : dnz = 3, o_nz = 2
4023      proc2 : dnz = 1, o_nz = 4
4024 .ve
4025    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4026    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4027    for proc3. i.e we are using 12+15+10=37 storage locations to store
4028    34 values.
4029 
4030    When d_nnz, o_nnz parameters are specified, the storage is specified
4031    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4032    In the above case the values for d_nnz,o_nnz are:
4033 .vb
4034      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4035      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4036      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4037 .ve
4038    Here the space allocated is sum of all the above values i.e 34, and
4039    hence pre-allocation is perfect.
4040 
4041    Level: intermediate
4042 
4043 .seealso: `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4044           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4045 @*/
4046 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
4047   PetscFunctionBegin;
4048   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4049   PetscValidType(B, 1);
4050   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4051   PetscFunctionReturn(0);
4052 }
4053 
4054 /*@
4055      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4056          CSR format for the local rows.
4057 
4058    Collective
4059 
4060    Input Parameters:
4061 +  comm - MPI communicator
4062 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4063 .  n - This value should be the same as the local size used in creating the
4064        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4065        calculated if N is given) For square matrices n is almost always m.
4066 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4067 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4068 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4069 .   j - column indices
4070 -   a - optional matrix values
4071 
4072    Output Parameter:
4073 .   mat - the matrix
4074 
4075    Level: intermediate
4076 
4077    Notes:
4078        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4079      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4080      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4081 
4082        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4083 
4084        The format which is used for the sparse matrix input, is equivalent to a
4085     row-major ordering.. i.e for the following matrix, the input data expected is
4086     as shown
4087 
4088        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4089 
4090 $        1 0 0
4091 $        2 0 3     P0
4092 $       -------
4093 $        4 5 6     P1
4094 $
4095 $     Process0 [P0]: rows_owned=[0,1]
4096 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4097 $        j =  {0,0,2}  [size = 3]
4098 $        v =  {1,2,3}  [size = 3]
4099 $
4100 $     Process1 [P1]: rows_owned=[2]
4101 $        i =  {0,3}    [size = nrow+1  = 1+1]
4102 $        j =  {0,1,2}  [size = 3]
4103 $        v =  {4,5,6}  [size = 3]
4104 
4105 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4106           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4107 @*/
4108 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) {
4109   PetscFunctionBegin;
4110   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4111   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4112   PetscCall(MatCreate(comm, mat));
4113   PetscCall(MatSetSizes(*mat, m, n, M, N));
4114   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4115   PetscCall(MatSetType(*mat, MATMPIAIJ));
4116   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4117   PetscFunctionReturn(0);
4118 }
4119 
4120 /*@
4121      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4122          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()`
4123 
4124      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4125 
4126    Collective
4127 
4128    Input Parameters:
4129 +  mat - the matrix
4130 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4131 .  n - This value should be the same as the local size used in creating the
4132        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4133        calculated if N is given) For square matrices n is almost always m.
4134 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4135 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4136 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4137 .  J - column indices
4138 -  v - matrix values
4139 
4140    Level: intermediate
4141 
4142 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4143           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4144 @*/
4145 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) {
4146   PetscInt        nnz, i;
4147   PetscBool       nooffprocentries;
4148   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4149   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4150   PetscScalar    *ad, *ao;
4151   PetscInt        ldi, Iii, md;
4152   const PetscInt *Adi = Ad->i;
4153   PetscInt       *ld  = Aij->ld;
4154 
4155   PetscFunctionBegin;
4156   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4157   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4158   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4159   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4160 
4161   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4162   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4163 
4164   for (i = 0; i < m; i++) {
4165     nnz = Ii[i + 1] - Ii[i];
4166     Iii = Ii[i];
4167     ldi = ld[i];
4168     md  = Adi[i + 1] - Adi[i];
4169     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4170     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4171     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4172     ad += md;
4173     ao += nnz - md;
4174   }
4175   nooffprocentries      = mat->nooffprocentries;
4176   mat->nooffprocentries = PETSC_TRUE;
4177   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4178   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4179   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4180   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4181   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4182   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4183   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4184   mat->nooffprocentries = nooffprocentries;
4185   PetscFunctionReturn(0);
4186 }
4187 
4188 /*@
4189      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4190 
4191    Collective
4192 
4193    Input Parameters:
4194 +  mat - the matrix
4195 -  v - matrix values, stored by row
4196 
4197    Level: intermediate
4198 
4199    Note:
4200    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4201 
4202 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4203           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4204 @*/
4205 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) {
4206   PetscInt        nnz, i, m;
4207   PetscBool       nooffprocentries;
4208   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4209   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4210   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4211   PetscScalar    *ad, *ao;
4212   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4213   PetscInt        ldi, Iii, md;
4214   PetscInt       *ld = Aij->ld;
4215 
4216   PetscFunctionBegin;
4217   m = mat->rmap->n;
4218 
4219   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4220   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4221   Iii = 0;
4222   for (i = 0; i < m; i++) {
4223     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4224     ldi = ld[i];
4225     md  = Adi[i + 1] - Adi[i];
4226     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4227     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4228     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4229     ad += md;
4230     ao += nnz - md;
4231     Iii += nnz;
4232   }
4233   nooffprocentries      = mat->nooffprocentries;
4234   mat->nooffprocentries = PETSC_TRUE;
4235   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4236   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4237   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4238   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4239   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4240   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4241   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4242   mat->nooffprocentries = nooffprocentries;
4243   PetscFunctionReturn(0);
4244 }
4245 
4246 /*@C
4247    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4248    (the default parallel PETSc format).  For good matrix assembly performance
4249    the user should preallocate the matrix storage by setting the parameters
4250    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4251    performance can be increased by more than a factor of 50.
4252 
4253    Collective
4254 
4255    Input Parameters:
4256 +  comm - MPI communicator
4257 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4258            This value should be the same as the local size used in creating the
4259            y vector for the matrix-vector product y = Ax.
4260 .  n - This value should be the same as the local size used in creating the
4261        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4262        calculated if N is given) For square matrices n is almost always m.
4263 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4264 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4265 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4266            (same value is used for all local rows)
4267 .  d_nnz - array containing the number of nonzeros in the various rows of the
4268            DIAGONAL portion of the local submatrix (possibly different for each row)
4269            or NULL, if d_nz is used to specify the nonzero structure.
4270            The size of this array is equal to the number of local rows, i.e 'm'.
4271 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4272            submatrix (same value is used for all local rows).
4273 -  o_nnz - array containing the number of nonzeros in the various rows of the
4274            OFF-DIAGONAL portion of the local submatrix (possibly different for
4275            each row) or NULL, if o_nz is used to specify the nonzero
4276            structure. The size of this array is equal to the number
4277            of local rows, i.e 'm'.
4278 
4279    Output Parameter:
4280 .  A - the matrix
4281 
4282    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4283    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4284    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4285 
4286    Notes:
4287    If the *_nnz parameter is given then the *_nz parameter is ignored
4288 
4289    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4290    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4291    storage requirements for this matrix.
4292 
4293    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4294    processor than it must be used on all processors that share the object for
4295    that argument.
4296 
4297    The user MUST specify either the local or global matrix dimensions
4298    (possibly both).
4299 
4300    The parallel matrix is partitioned across processors such that the
4301    first m0 rows belong to process 0, the next m1 rows belong to
4302    process 1, the next m2 rows belong to process 2 etc.. where
4303    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4304    values corresponding to [m x N] submatrix.
4305 
4306    The columns are logically partitioned with the n0 columns belonging
4307    to 0th partition, the next n1 columns belonging to the next
4308    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4309 
4310    The DIAGONAL portion of the local submatrix on any given processor
4311    is the submatrix corresponding to the rows and columns m,n
4312    corresponding to the given processor. i.e diagonal matrix on
4313    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4314    etc. The remaining portion of the local submatrix [m x (N-n)]
4315    constitute the OFF-DIAGONAL portion. The example below better
4316    illustrates this concept.
4317 
4318    For a square global matrix we define each processor's diagonal portion
4319    to be its local rows and the corresponding columns (a square submatrix);
4320    each processor's off-diagonal portion encompasses the remainder of the
4321    local matrix (a rectangular submatrix).
4322 
4323    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4324 
4325    When calling this routine with a single process communicator, a matrix of
4326    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4327    type of communicator, use the construction mechanism
4328 .vb
4329      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4330 .ve
4331 
4332 $     MatCreate(...,&A);
4333 $     MatSetType(A,MATMPIAIJ);
4334 $     MatSetSizes(A, m,n,M,N);
4335 $     MatMPIAIJSetPreallocation(A,...);
4336 
4337    By default, this format uses inodes (identical nodes) when possible.
4338    We search for consecutive rows with the same nonzero structure, thereby
4339    reusing matrix information to achieve increased efficiency.
4340 
4341    Options Database Keys:
4342 +  -mat_no_inode  - Do not use inodes
4343 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4344 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4345         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4346         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4347 
4348    Example usage:
4349 
4350    Consider the following 8x8 matrix with 34 non-zero values, that is
4351    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4352    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4353    as follows
4354 
4355 .vb
4356             1  2  0  |  0  3  0  |  0  4
4357     Proc0   0  5  6  |  7  0  0  |  8  0
4358             9  0 10  | 11  0  0  | 12  0
4359     -------------------------------------
4360            13  0 14  | 15 16 17  |  0  0
4361     Proc1   0 18  0  | 19 20 21  |  0  0
4362             0  0  0  | 22 23  0  | 24  0
4363     -------------------------------------
4364     Proc2  25 26 27  |  0  0 28  | 29  0
4365            30  0  0  | 31 32 33  |  0 34
4366 .ve
4367 
4368    This can be represented as a collection of submatrices as
4369 
4370 .vb
4371       A B C
4372       D E F
4373       G H I
4374 .ve
4375 
4376    Where the submatrices A,B,C are owned by proc0, D,E,F are
4377    owned by proc1, G,H,I are owned by proc2.
4378 
4379    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4380    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4381    The 'M','N' parameters are 8,8, and have the same values on all procs.
4382 
4383    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4384    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4385    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4386    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4387    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4388    matrix, ans [DF] as another SeqAIJ matrix.
4389 
4390    When d_nz, o_nz parameters are specified, d_nz storage elements are
4391    allocated for every row of the local diagonal submatrix, and o_nz
4392    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4393    One way to choose d_nz and o_nz is to use the max nonzerors per local
4394    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4395    In this case, the values of d_nz,o_nz are
4396 .vb
4397      proc0 : dnz = 2, o_nz = 2
4398      proc1 : dnz = 3, o_nz = 2
4399      proc2 : dnz = 1, o_nz = 4
4400 .ve
4401    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4402    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4403    for proc3. i.e we are using 12+15+10=37 storage locations to store
4404    34 values.
4405 
4406    When d_nnz, o_nnz parameters are specified, the storage is specified
4407    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4408    In the above case the values for d_nnz,o_nnz are
4409 .vb
4410      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4411      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4412      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4413 .ve
4414    Here the space allocated is sum of all the above values i.e 34, and
4415    hence pre-allocation is perfect.
4416 
4417    Level: intermediate
4418 
4419 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4420           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4421 @*/
4422 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) {
4423   PetscMPIInt size;
4424 
4425   PetscFunctionBegin;
4426   PetscCall(MatCreate(comm, A));
4427   PetscCall(MatSetSizes(*A, m, n, M, N));
4428   PetscCallMPI(MPI_Comm_size(comm, &size));
4429   if (size > 1) {
4430     PetscCall(MatSetType(*A, MATMPIAIJ));
4431     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4432   } else {
4433     PetscCall(MatSetType(*A, MATSEQAIJ));
4434     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4435   }
4436   PetscFunctionReturn(0);
4437 }
4438 
4439 /*@C
4440   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4441 
4442   Not collective
4443 
4444   Input Parameter:
4445 . A - The `MATMPIAIJ` matrix
4446 
4447   Output Parameters:
4448 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4449 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4450 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4451 
4452   Note:
4453   The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4454   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4455   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4456   local column numbers to global column numbers in the original matrix.
4457 
4458   Level: intermediate
4459 
4460 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4461 @*/
4462 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) {
4463   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4464   PetscBool   flg;
4465 
4466   PetscFunctionBegin;
4467   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4468   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4469   if (Ad) *Ad = a->A;
4470   if (Ao) *Ao = a->B;
4471   if (colmap) *colmap = a->garray;
4472   PetscFunctionReturn(0);
4473 }
4474 
4475 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
4476   PetscInt     m, N, i, rstart, nnz, Ii;
4477   PetscInt    *indx;
4478   PetscScalar *values;
4479   MatType      rootType;
4480 
4481   PetscFunctionBegin;
4482   PetscCall(MatGetSize(inmat, &m, &N));
4483   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4484     PetscInt *dnz, *onz, sum, bs, cbs;
4485 
4486     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4487     /* Check sum(n) = N */
4488     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4489     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4490 
4491     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4492     rstart -= m;
4493 
4494     MatPreallocateBegin(comm, m, n, dnz, onz);
4495     for (i = 0; i < m; i++) {
4496       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4497       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4498       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4499     }
4500 
4501     PetscCall(MatCreate(comm, outmat));
4502     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4503     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4504     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4505     PetscCall(MatGetRootType_Private(inmat, &rootType));
4506     PetscCall(MatSetType(*outmat, rootType));
4507     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4508     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4509     MatPreallocateEnd(dnz, onz);
4510     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4511   }
4512 
4513   /* numeric phase */
4514   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4515   for (i = 0; i < m; i++) {
4516     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4517     Ii = i + rstart;
4518     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4519     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4520   }
4521   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4522   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4523   PetscFunctionReturn(0);
4524 }
4525 
4526 PetscErrorCode MatFileSplit(Mat A, char *outfile) {
4527   PetscMPIInt        rank;
4528   PetscInt           m, N, i, rstart, nnz;
4529   size_t             len;
4530   const PetscInt    *indx;
4531   PetscViewer        out;
4532   char              *name;
4533   Mat                B;
4534   const PetscScalar *values;
4535 
4536   PetscFunctionBegin;
4537   PetscCall(MatGetLocalSize(A, &m, NULL));
4538   PetscCall(MatGetSize(A, NULL, &N));
4539   /* Should this be the type of the diagonal block of A? */
4540   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4541   PetscCall(MatSetSizes(B, m, N, m, N));
4542   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4543   PetscCall(MatSetType(B, MATSEQAIJ));
4544   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4545   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4546   for (i = 0; i < m; i++) {
4547     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4548     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4549     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4550   }
4551   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4552   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4553 
4554   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4555   PetscCall(PetscStrlen(outfile, &len));
4556   PetscCall(PetscMalloc1(len + 6, &name));
4557   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4558   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4559   PetscCall(PetscFree(name));
4560   PetscCall(MatView(B, out));
4561   PetscCall(PetscViewerDestroy(&out));
4562   PetscCall(MatDestroy(&B));
4563   PetscFunctionReturn(0);
4564 }
4565 
4566 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) {
4567   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4568 
4569   PetscFunctionBegin;
4570   if (!merge) PetscFunctionReturn(0);
4571   PetscCall(PetscFree(merge->id_r));
4572   PetscCall(PetscFree(merge->len_s));
4573   PetscCall(PetscFree(merge->len_r));
4574   PetscCall(PetscFree(merge->bi));
4575   PetscCall(PetscFree(merge->bj));
4576   PetscCall(PetscFree(merge->buf_ri[0]));
4577   PetscCall(PetscFree(merge->buf_ri));
4578   PetscCall(PetscFree(merge->buf_rj[0]));
4579   PetscCall(PetscFree(merge->buf_rj));
4580   PetscCall(PetscFree(merge->coi));
4581   PetscCall(PetscFree(merge->coj));
4582   PetscCall(PetscFree(merge->owners_co));
4583   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4584   PetscCall(PetscFree(merge));
4585   PetscFunctionReturn(0);
4586 }
4587 
4588 #include <../src/mat/utils/freespace.h>
4589 #include <petscbt.h>
4590 
4591 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) {
4592   MPI_Comm             comm;
4593   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4594   PetscMPIInt          size, rank, taga, *len_s;
4595   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4596   PetscInt             proc, m;
4597   PetscInt           **buf_ri, **buf_rj;
4598   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4599   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4600   MPI_Request         *s_waits, *r_waits;
4601   MPI_Status          *status;
4602   const MatScalar     *aa, *a_a;
4603   MatScalar          **abuf_r, *ba_i;
4604   Mat_Merge_SeqsToMPI *merge;
4605   PetscContainer       container;
4606 
4607   PetscFunctionBegin;
4608   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4609   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4610 
4611   PetscCallMPI(MPI_Comm_size(comm, &size));
4612   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4613 
4614   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4615   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4616   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4617   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4618   aa = a_a;
4619 
4620   bi     = merge->bi;
4621   bj     = merge->bj;
4622   buf_ri = merge->buf_ri;
4623   buf_rj = merge->buf_rj;
4624 
4625   PetscCall(PetscMalloc1(size, &status));
4626   owners = merge->rowmap->range;
4627   len_s  = merge->len_s;
4628 
4629   /* send and recv matrix values */
4630   /*-----------------------------*/
4631   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4632   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4633 
4634   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4635   for (proc = 0, k = 0; proc < size; proc++) {
4636     if (!len_s[proc]) continue;
4637     i = owners[proc];
4638     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4639     k++;
4640   }
4641 
4642   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4643   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4644   PetscCall(PetscFree(status));
4645 
4646   PetscCall(PetscFree(s_waits));
4647   PetscCall(PetscFree(r_waits));
4648 
4649   /* insert mat values of mpimat */
4650   /*----------------------------*/
4651   PetscCall(PetscMalloc1(N, &ba_i));
4652   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4653 
4654   for (k = 0; k < merge->nrecv; k++) {
4655     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4656     nrows       = *(buf_ri_k[k]);
4657     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4658     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4659   }
4660 
4661   /* set values of ba */
4662   m = merge->rowmap->n;
4663   for (i = 0; i < m; i++) {
4664     arow = owners[rank] + i;
4665     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4666     bnzi = bi[i + 1] - bi[i];
4667     PetscCall(PetscArrayzero(ba_i, bnzi));
4668 
4669     /* add local non-zero vals of this proc's seqmat into ba */
4670     anzi   = ai[arow + 1] - ai[arow];
4671     aj     = a->j + ai[arow];
4672     aa     = a_a + ai[arow];
4673     nextaj = 0;
4674     for (j = 0; nextaj < anzi; j++) {
4675       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4676         ba_i[j] += aa[nextaj++];
4677       }
4678     }
4679 
4680     /* add received vals into ba */
4681     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4682       /* i-th row */
4683       if (i == *nextrow[k]) {
4684         anzi   = *(nextai[k] + 1) - *nextai[k];
4685         aj     = buf_rj[k] + *(nextai[k]);
4686         aa     = abuf_r[k] + *(nextai[k]);
4687         nextaj = 0;
4688         for (j = 0; nextaj < anzi; j++) {
4689           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4690             ba_i[j] += aa[nextaj++];
4691           }
4692         }
4693         nextrow[k]++;
4694         nextai[k]++;
4695       }
4696     }
4697     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4698   }
4699   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4700   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4701   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4702 
4703   PetscCall(PetscFree(abuf_r[0]));
4704   PetscCall(PetscFree(abuf_r));
4705   PetscCall(PetscFree(ba_i));
4706   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4707   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4708   PetscFunctionReturn(0);
4709 }
4710 
4711 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) {
4712   Mat                  B_mpi;
4713   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4714   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4715   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4716   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4717   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4718   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4719   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4720   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4721   MPI_Status          *status;
4722   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4723   PetscBT              lnkbt;
4724   Mat_Merge_SeqsToMPI *merge;
4725   PetscContainer       container;
4726 
4727   PetscFunctionBegin;
4728   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4729 
4730   /* make sure it is a PETSc comm */
4731   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4732   PetscCallMPI(MPI_Comm_size(comm, &size));
4733   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4734 
4735   PetscCall(PetscNew(&merge));
4736   PetscCall(PetscMalloc1(size, &status));
4737 
4738   /* determine row ownership */
4739   /*---------------------------------------------------------*/
4740   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4741   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4742   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4743   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4744   PetscCall(PetscLayoutSetUp(merge->rowmap));
4745   PetscCall(PetscMalloc1(size, &len_si));
4746   PetscCall(PetscMalloc1(size, &merge->len_s));
4747 
4748   m      = merge->rowmap->n;
4749   owners = merge->rowmap->range;
4750 
4751   /* determine the number of messages to send, their lengths */
4752   /*---------------------------------------------------------*/
4753   len_s = merge->len_s;
4754 
4755   len          = 0; /* length of buf_si[] */
4756   merge->nsend = 0;
4757   for (proc = 0; proc < size; proc++) {
4758     len_si[proc] = 0;
4759     if (proc == rank) {
4760       len_s[proc] = 0;
4761     } else {
4762       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4763       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4764     }
4765     if (len_s[proc]) {
4766       merge->nsend++;
4767       nrows = 0;
4768       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4769         if (ai[i + 1] > ai[i]) nrows++;
4770       }
4771       len_si[proc] = 2 * (nrows + 1);
4772       len += len_si[proc];
4773     }
4774   }
4775 
4776   /* determine the number and length of messages to receive for ij-structure */
4777   /*-------------------------------------------------------------------------*/
4778   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4779   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4780 
4781   /* post the Irecv of j-structure */
4782   /*-------------------------------*/
4783   PetscCall(PetscCommGetNewTag(comm, &tagj));
4784   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4785 
4786   /* post the Isend of j-structure */
4787   /*--------------------------------*/
4788   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4789 
4790   for (proc = 0, k = 0; proc < size; proc++) {
4791     if (!len_s[proc]) continue;
4792     i = owners[proc];
4793     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4794     k++;
4795   }
4796 
4797   /* receives and sends of j-structure are complete */
4798   /*------------------------------------------------*/
4799   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4800   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4801 
4802   /* send and recv i-structure */
4803   /*---------------------------*/
4804   PetscCall(PetscCommGetNewTag(comm, &tagi));
4805   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4806 
4807   PetscCall(PetscMalloc1(len + 1, &buf_s));
4808   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4809   for (proc = 0, k = 0; proc < size; proc++) {
4810     if (!len_s[proc]) continue;
4811     /* form outgoing message for i-structure:
4812          buf_si[0]:                 nrows to be sent
4813                [1:nrows]:           row index (global)
4814                [nrows+1:2*nrows+1]: i-structure index
4815     */
4816     /*-------------------------------------------*/
4817     nrows       = len_si[proc] / 2 - 1;
4818     buf_si_i    = buf_si + nrows + 1;
4819     buf_si[0]   = nrows;
4820     buf_si_i[0] = 0;
4821     nrows       = 0;
4822     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4823       anzi = ai[i + 1] - ai[i];
4824       if (anzi) {
4825         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4826         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4827         nrows++;
4828       }
4829     }
4830     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4831     k++;
4832     buf_si += len_si[proc];
4833   }
4834 
4835   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4836   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4837 
4838   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4839   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4840 
4841   PetscCall(PetscFree(len_si));
4842   PetscCall(PetscFree(len_ri));
4843   PetscCall(PetscFree(rj_waits));
4844   PetscCall(PetscFree2(si_waits, sj_waits));
4845   PetscCall(PetscFree(ri_waits));
4846   PetscCall(PetscFree(buf_s));
4847   PetscCall(PetscFree(status));
4848 
4849   /* compute a local seq matrix in each processor */
4850   /*----------------------------------------------*/
4851   /* allocate bi array and free space for accumulating nonzero column info */
4852   PetscCall(PetscMalloc1(m + 1, &bi));
4853   bi[0] = 0;
4854 
4855   /* create and initialize a linked list */
4856   nlnk = N + 1;
4857   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4858 
4859   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4860   len = ai[owners[rank + 1]] - ai[owners[rank]];
4861   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4862 
4863   current_space = free_space;
4864 
4865   /* determine symbolic info for each local row */
4866   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4867 
4868   for (k = 0; k < merge->nrecv; k++) {
4869     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4870     nrows       = *buf_ri_k[k];
4871     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4872     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4873   }
4874 
4875   MatPreallocateBegin(comm, m, n, dnz, onz);
4876   len = 0;
4877   for (i = 0; i < m; i++) {
4878     bnzi = 0;
4879     /* add local non-zero cols of this proc's seqmat into lnk */
4880     arow = owners[rank] + i;
4881     anzi = ai[arow + 1] - ai[arow];
4882     aj   = a->j + ai[arow];
4883     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4884     bnzi += nlnk;
4885     /* add received col data into lnk */
4886     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4887       if (i == *nextrow[k]) {            /* i-th row */
4888         anzi = *(nextai[k] + 1) - *nextai[k];
4889         aj   = buf_rj[k] + *nextai[k];
4890         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4891         bnzi += nlnk;
4892         nextrow[k]++;
4893         nextai[k]++;
4894       }
4895     }
4896     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4897 
4898     /* if free space is not available, make more free space */
4899     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
4900     /* copy data into free space, then initialize lnk */
4901     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
4902     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4903 
4904     current_space->array += bnzi;
4905     current_space->local_used += bnzi;
4906     current_space->local_remaining -= bnzi;
4907 
4908     bi[i + 1] = bi[i] + bnzi;
4909   }
4910 
4911   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4912 
4913   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
4914   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
4915   PetscCall(PetscLLDestroy(lnk, lnkbt));
4916 
4917   /* create symbolic parallel matrix B_mpi */
4918   /*---------------------------------------*/
4919   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
4920   PetscCall(MatCreate(comm, &B_mpi));
4921   if (n == PETSC_DECIDE) {
4922     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
4923   } else {
4924     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4925   }
4926   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
4927   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
4928   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
4929   MatPreallocateEnd(dnz, onz);
4930   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
4931 
4932   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4933   B_mpi->assembled = PETSC_FALSE;
4934   merge->bi        = bi;
4935   merge->bj        = bj;
4936   merge->buf_ri    = buf_ri;
4937   merge->buf_rj    = buf_rj;
4938   merge->coi       = NULL;
4939   merge->coj       = NULL;
4940   merge->owners_co = NULL;
4941 
4942   PetscCall(PetscCommDestroy(&comm));
4943 
4944   /* attach the supporting struct to B_mpi for reuse */
4945   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
4946   PetscCall(PetscContainerSetPointer(container, merge));
4947   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
4948   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
4949   PetscCall(PetscContainerDestroy(&container));
4950   *mpimat = B_mpi;
4951 
4952   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
4953   PetscFunctionReturn(0);
4954 }
4955 
4956 /*@C
4957       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
4958                  matrices from each processor
4959 
4960     Collective
4961 
4962    Input Parameters:
4963 +    comm - the communicators the parallel matrix will live on
4964 .    seqmat - the input sequential matrices
4965 .    m - number of local rows (or `PETSC_DECIDE`)
4966 .    n - number of local columns (or `PETSC_DECIDE`)
4967 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
4968 
4969    Output Parameter:
4970 .    mpimat - the parallel matrix generated
4971 
4972     Level: advanced
4973 
4974    Note:
4975      The dimensions of the sequential matrix in each processor MUST be the same.
4976      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4977      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
4978 @*/
4979 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) {
4980   PetscMPIInt size;
4981 
4982   PetscFunctionBegin;
4983   PetscCallMPI(MPI_Comm_size(comm, &size));
4984   if (size == 1) {
4985     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
4986     if (scall == MAT_INITIAL_MATRIX) {
4987       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
4988     } else {
4989       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
4990     }
4991     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
4992     PetscFunctionReturn(0);
4993   }
4994   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
4995   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
4996   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
4997   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
4998   PetscFunctionReturn(0);
4999 }
5000 
5001 /*@
5002      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5003           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5004           with `MatGetSize()`
5005 
5006     Not Collective
5007 
5008    Input Parameters:
5009 +    A - the matrix
5010 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5011 
5012    Output Parameter:
5013 .    A_loc - the local sequential matrix generated
5014 
5015     Level: developer
5016 
5017    Notes:
5018      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5019 
5020      Destroy the matrix with `MatDestroy()`
5021 
5022 .seealso: `MatMPIAIJGetLocalMat()`
5023 @*/
5024 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) {
5025   PetscBool mpi;
5026 
5027   PetscFunctionBegin;
5028   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5029   if (mpi) {
5030     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5031   } else {
5032     *A_loc = A;
5033     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5034   }
5035   PetscFunctionReturn(0);
5036 }
5037 
5038 /*@
5039      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5040           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5041           with `MatGetSize()`
5042 
5043     Not Collective
5044 
5045    Input Parameters:
5046 +    A - the matrix
5047 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5048 
5049    Output Parameter:
5050 .    A_loc - the local sequential matrix generated
5051 
5052     Level: developer
5053 
5054    Notes:
5055      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5056 
5057      When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A.
5058      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called.
5059      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5060      modify the values of the returned A_loc.
5061 
5062 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5063 @*/
5064 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) {
5065   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5066   Mat_SeqAIJ        *mat, *a, *b;
5067   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5068   const PetscScalar *aa, *ba, *aav, *bav;
5069   PetscScalar       *ca, *cam;
5070   PetscMPIInt        size;
5071   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5072   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5073   PetscBool          match;
5074 
5075   PetscFunctionBegin;
5076   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5077   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5078   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5079   if (size == 1) {
5080     if (scall == MAT_INITIAL_MATRIX) {
5081       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5082       *A_loc = mpimat->A;
5083     } else if (scall == MAT_REUSE_MATRIX) {
5084       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5085     }
5086     PetscFunctionReturn(0);
5087   }
5088 
5089   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5090   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5091   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5092   ai = a->i;
5093   aj = a->j;
5094   bi = b->i;
5095   bj = b->j;
5096   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5097   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5098   aa = aav;
5099   ba = bav;
5100   if (scall == MAT_INITIAL_MATRIX) {
5101     PetscCall(PetscMalloc1(1 + am, &ci));
5102     ci[0] = 0;
5103     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5104     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5105     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5106     k = 0;
5107     for (i = 0; i < am; i++) {
5108       ncols_o = bi[i + 1] - bi[i];
5109       ncols_d = ai[i + 1] - ai[i];
5110       /* off-diagonal portion of A */
5111       for (jo = 0; jo < ncols_o; jo++) {
5112         col = cmap[*bj];
5113         if (col >= cstart) break;
5114         cj[k] = col;
5115         bj++;
5116         ca[k++] = *ba++;
5117       }
5118       /* diagonal portion of A */
5119       for (j = 0; j < ncols_d; j++) {
5120         cj[k]   = cstart + *aj++;
5121         ca[k++] = *aa++;
5122       }
5123       /* off-diagonal portion of A */
5124       for (j = jo; j < ncols_o; j++) {
5125         cj[k]   = cmap[*bj++];
5126         ca[k++] = *ba++;
5127       }
5128     }
5129     /* put together the new matrix */
5130     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5131     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5132     /* Since these are PETSc arrays, change flags to free them as necessary. */
5133     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5134     mat->free_a  = PETSC_TRUE;
5135     mat->free_ij = PETSC_TRUE;
5136     mat->nonew   = 0;
5137   } else if (scall == MAT_REUSE_MATRIX) {
5138     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5139     ci  = mat->i;
5140     cj  = mat->j;
5141     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5142     for (i = 0; i < am; i++) {
5143       /* off-diagonal portion of A */
5144       ncols_o = bi[i + 1] - bi[i];
5145       for (jo = 0; jo < ncols_o; jo++) {
5146         col = cmap[*bj];
5147         if (col >= cstart) break;
5148         *cam++ = *ba++;
5149         bj++;
5150       }
5151       /* diagonal portion of A */
5152       ncols_d = ai[i + 1] - ai[i];
5153       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5154       /* off-diagonal portion of A */
5155       for (j = jo; j < ncols_o; j++) {
5156         *cam++ = *ba++;
5157         bj++;
5158       }
5159     }
5160     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5161   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5162   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5163   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5164   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5165   PetscFunctionReturn(0);
5166 }
5167 
5168 /*@
5169      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5170           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5171 
5172     Not Collective
5173 
5174    Input Parameters:
5175 +    A - the matrix
5176 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5177 
5178    Output Parameters:
5179 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5180 -    A_loc - the local sequential matrix generated
5181 
5182     Level: developer
5183 
5184    Note:
5185      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering)
5186 
5187 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5188 @*/
5189 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) {
5190   Mat             Ao, Ad;
5191   const PetscInt *cmap;
5192   PetscMPIInt     size;
5193   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5194 
5195   PetscFunctionBegin;
5196   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5197   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5198   if (size == 1) {
5199     if (scall == MAT_INITIAL_MATRIX) {
5200       PetscCall(PetscObjectReference((PetscObject)Ad));
5201       *A_loc = Ad;
5202     } else if (scall == MAT_REUSE_MATRIX) {
5203       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5204     }
5205     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5206     PetscFunctionReturn(0);
5207   }
5208   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5209   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5210   if (f) {
5211     PetscCall((*f)(A, scall, glob, A_loc));
5212   } else {
5213     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5214     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5215     Mat_SeqAIJ        *c;
5216     PetscInt          *ai = a->i, *aj = a->j;
5217     PetscInt          *bi = b->i, *bj = b->j;
5218     PetscInt          *ci, *cj;
5219     const PetscScalar *aa, *ba;
5220     PetscScalar       *ca;
5221     PetscInt           i, j, am, dn, on;
5222 
5223     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5224     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5225     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5226     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5227     if (scall == MAT_INITIAL_MATRIX) {
5228       PetscInt k;
5229       PetscCall(PetscMalloc1(1 + am, &ci));
5230       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5231       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5232       ci[0] = 0;
5233       for (i = 0, k = 0; i < am; i++) {
5234         const PetscInt ncols_o = bi[i + 1] - bi[i];
5235         const PetscInt ncols_d = ai[i + 1] - ai[i];
5236         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5237         /* diagonal portion of A */
5238         for (j = 0; j < ncols_d; j++, k++) {
5239           cj[k] = *aj++;
5240           ca[k] = *aa++;
5241         }
5242         /* off-diagonal portion of A */
5243         for (j = 0; j < ncols_o; j++, k++) {
5244           cj[k] = dn + *bj++;
5245           ca[k] = *ba++;
5246         }
5247       }
5248       /* put together the new matrix */
5249       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5250       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5251       /* Since these are PETSc arrays, change flags to free them as necessary. */
5252       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5253       c->free_a  = PETSC_TRUE;
5254       c->free_ij = PETSC_TRUE;
5255       c->nonew   = 0;
5256       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5257     } else if (scall == MAT_REUSE_MATRIX) {
5258       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5259       for (i = 0; i < am; i++) {
5260         const PetscInt ncols_d = ai[i + 1] - ai[i];
5261         const PetscInt ncols_o = bi[i + 1] - bi[i];
5262         /* diagonal portion of A */
5263         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5264         /* off-diagonal portion of A */
5265         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5266       }
5267       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5268     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5269     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5270     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5271     if (glob) {
5272       PetscInt cst, *gidx;
5273 
5274       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5275       PetscCall(PetscMalloc1(dn + on, &gidx));
5276       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5277       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5278       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5279     }
5280   }
5281   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5282   PetscFunctionReturn(0);
5283 }
5284 
5285 /*@C
5286      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5287 
5288     Not Collective
5289 
5290    Input Parameters:
5291 +    A - the matrix
5292 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5293 -    row, col - index sets of rows and columns to extract (or NULL)
5294 
5295    Output Parameter:
5296 .    A_loc - the local sequential matrix generated
5297 
5298     Level: developer
5299 
5300 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5301 @*/
5302 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) {
5303   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5304   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5305   IS          isrowa, iscola;
5306   Mat        *aloc;
5307   PetscBool   match;
5308 
5309   PetscFunctionBegin;
5310   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5311   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5312   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5313   if (!row) {
5314     start = A->rmap->rstart;
5315     end   = A->rmap->rend;
5316     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5317   } else {
5318     isrowa = *row;
5319   }
5320   if (!col) {
5321     start = A->cmap->rstart;
5322     cmap  = a->garray;
5323     nzA   = a->A->cmap->n;
5324     nzB   = a->B->cmap->n;
5325     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5326     ncols = 0;
5327     for (i = 0; i < nzB; i++) {
5328       if (cmap[i] < start) idx[ncols++] = cmap[i];
5329       else break;
5330     }
5331     imark = i;
5332     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5333     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5334     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5335   } else {
5336     iscola = *col;
5337   }
5338   if (scall != MAT_INITIAL_MATRIX) {
5339     PetscCall(PetscMalloc1(1, &aloc));
5340     aloc[0] = *A_loc;
5341   }
5342   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5343   if (!col) { /* attach global id of condensed columns */
5344     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5345   }
5346   *A_loc = aloc[0];
5347   PetscCall(PetscFree(aloc));
5348   if (!row) PetscCall(ISDestroy(&isrowa));
5349   if (!col) PetscCall(ISDestroy(&iscola));
5350   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5351   PetscFunctionReturn(0);
5352 }
5353 
5354 /*
5355  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5356  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5357  * on a global size.
5358  * */
5359 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) {
5360   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5361   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5362   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5363   PetscMPIInt            owner;
5364   PetscSFNode           *iremote, *oiremote;
5365   const PetscInt        *lrowindices;
5366   PetscSF                sf, osf;
5367   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5368   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5369   MPI_Comm               comm;
5370   ISLocalToGlobalMapping mapping;
5371   const PetscScalar     *pd_a, *po_a;
5372 
5373   PetscFunctionBegin;
5374   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5375   /* plocalsize is the number of roots
5376    * nrows is the number of leaves
5377    * */
5378   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5379   PetscCall(ISGetLocalSize(rows, &nrows));
5380   PetscCall(PetscCalloc1(nrows, &iremote));
5381   PetscCall(ISGetIndices(rows, &lrowindices));
5382   for (i = 0; i < nrows; i++) {
5383     /* Find a remote index and an owner for a row
5384      * The row could be local or remote
5385      * */
5386     owner = 0;
5387     lidx  = 0;
5388     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5389     iremote[i].index = lidx;
5390     iremote[i].rank  = owner;
5391   }
5392   /* Create SF to communicate how many nonzero columns for each row */
5393   PetscCall(PetscSFCreate(comm, &sf));
5394   /* SF will figure out the number of nonzero colunms for each row, and their
5395    * offsets
5396    * */
5397   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5398   PetscCall(PetscSFSetFromOptions(sf));
5399   PetscCall(PetscSFSetUp(sf));
5400 
5401   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5402   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5403   PetscCall(PetscCalloc1(nrows, &pnnz));
5404   roffsets[0] = 0;
5405   roffsets[1] = 0;
5406   for (i = 0; i < plocalsize; i++) {
5407     /* diag */
5408     nrcols[i * 2 + 0]         = pd->i[i + 1] - pd->i[i];
5409     /* off diag */
5410     nrcols[i * 2 + 1]         = po->i[i + 1] - po->i[i];
5411     /* compute offsets so that we relative location for each row */
5412     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5413     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5414   }
5415   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5416   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5417   /* 'r' means root, and 'l' means leaf */
5418   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5419   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5420   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5421   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5422   PetscCall(PetscSFDestroy(&sf));
5423   PetscCall(PetscFree(roffsets));
5424   PetscCall(PetscFree(nrcols));
5425   dntotalcols = 0;
5426   ontotalcols = 0;
5427   ncol        = 0;
5428   for (i = 0; i < nrows; i++) {
5429     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5430     ncol    = PetscMax(pnnz[i], ncol);
5431     /* diag */
5432     dntotalcols += nlcols[i * 2 + 0];
5433     /* off diag */
5434     ontotalcols += nlcols[i * 2 + 1];
5435   }
5436   /* We do not need to figure the right number of columns
5437    * since all the calculations will be done by going through the raw data
5438    * */
5439   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5440   PetscCall(MatSetUp(*P_oth));
5441   PetscCall(PetscFree(pnnz));
5442   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5443   /* diag */
5444   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5445   /* off diag */
5446   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5447   /* diag */
5448   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5449   /* off diag */
5450   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5451   dntotalcols = 0;
5452   ontotalcols = 0;
5453   ntotalcols  = 0;
5454   for (i = 0; i < nrows; i++) {
5455     owner = 0;
5456     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5457     /* Set iremote for diag matrix */
5458     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5459       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5460       iremote[dntotalcols].rank  = owner;
5461       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5462       ilocal[dntotalcols++]      = ntotalcols++;
5463     }
5464     /* off diag */
5465     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5466       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5467       oiremote[ontotalcols].rank  = owner;
5468       oilocal[ontotalcols++]      = ntotalcols++;
5469     }
5470   }
5471   PetscCall(ISRestoreIndices(rows, &lrowindices));
5472   PetscCall(PetscFree(loffsets));
5473   PetscCall(PetscFree(nlcols));
5474   PetscCall(PetscSFCreate(comm, &sf));
5475   /* P serves as roots and P_oth is leaves
5476    * Diag matrix
5477    * */
5478   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5479   PetscCall(PetscSFSetFromOptions(sf));
5480   PetscCall(PetscSFSetUp(sf));
5481 
5482   PetscCall(PetscSFCreate(comm, &osf));
5483   /* Off diag */
5484   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5485   PetscCall(PetscSFSetFromOptions(osf));
5486   PetscCall(PetscSFSetUp(osf));
5487   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5488   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5489   /* We operate on the matrix internal data for saving memory */
5490   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5491   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5492   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5493   /* Convert to global indices for diag matrix */
5494   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5495   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5496   /* We want P_oth store global indices */
5497   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5498   /* Use memory scalable approach */
5499   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5500   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5501   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5502   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5503   /* Convert back to local indices */
5504   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5505   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5506   nout = 0;
5507   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5508   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5509   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5510   /* Exchange values */
5511   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5512   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5513   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5514   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5515   /* Stop PETSc from shrinking memory */
5516   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5517   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5518   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5519   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5520   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5521   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5522   PetscCall(PetscSFDestroy(&sf));
5523   PetscCall(PetscSFDestroy(&osf));
5524   PetscFunctionReturn(0);
5525 }
5526 
5527 /*
5528  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5529  * This supports MPIAIJ and MAIJ
5530  * */
5531 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) {
5532   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5533   Mat_SeqAIJ *p_oth;
5534   IS          rows, map;
5535   PetscHMapI  hamp;
5536   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5537   MPI_Comm    comm;
5538   PetscSF     sf, osf;
5539   PetscBool   has;
5540 
5541   PetscFunctionBegin;
5542   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5543   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5544   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5545    *  and then create a submatrix (that often is an overlapping matrix)
5546    * */
5547   if (reuse == MAT_INITIAL_MATRIX) {
5548     /* Use a hash table to figure out unique keys */
5549     PetscCall(PetscHMapICreate(&hamp));
5550     PetscCall(PetscHMapIResize(hamp, a->B->cmap->n));
5551     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5552     count = 0;
5553     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5554     for (i = 0; i < a->B->cmap->n; i++) {
5555       key = a->garray[i] / dof;
5556       PetscCall(PetscHMapIHas(hamp, key, &has));
5557       if (!has) {
5558         mapping[i] = count;
5559         PetscCall(PetscHMapISet(hamp, key, count++));
5560       } else {
5561         /* Current 'i' has the same value the previous step */
5562         mapping[i] = count - 1;
5563       }
5564     }
5565     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5566     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5567     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ", htsize, count);
5568     PetscCall(PetscCalloc1(htsize, &rowindices));
5569     off = 0;
5570     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5571     PetscCall(PetscHMapIDestroy(&hamp));
5572     PetscCall(PetscSortInt(htsize, rowindices));
5573     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5574     /* In case, the matrix was already created but users want to recreate the matrix */
5575     PetscCall(MatDestroy(P_oth));
5576     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5577     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5578     PetscCall(ISDestroy(&map));
5579     PetscCall(ISDestroy(&rows));
5580   } else if (reuse == MAT_REUSE_MATRIX) {
5581     /* If matrix was already created, we simply update values using SF objects
5582      * that as attached to the matrix ealier.
5583      */
5584     const PetscScalar *pd_a, *po_a;
5585 
5586     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5587     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5588     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5589     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5590     /* Update values in place */
5591     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5592     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5593     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5594     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5595     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5596     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5597     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5598     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5599   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5600   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5601   PetscFunctionReturn(0);
5602 }
5603 
5604 /*@C
5605   MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A
5606 
5607   Collective on A
5608 
5609   Input Parameters:
5610 + A - the first matrix in `MATMPIAIJ` format
5611 . B - the second matrix in `MATMPIAIJ` format
5612 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5613 
5614   Output Parameters:
5615 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5616 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5617 - B_seq - the sequential matrix generated
5618 
5619   Level: developer
5620 
5621 @*/
5622 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) {
5623   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5624   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5625   IS          isrowb, iscolb;
5626   Mat        *bseq = NULL;
5627 
5628   PetscFunctionBegin;
5629   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5630     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5631   }
5632   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5633 
5634   if (scall == MAT_INITIAL_MATRIX) {
5635     start = A->cmap->rstart;
5636     cmap  = a->garray;
5637     nzA   = a->A->cmap->n;
5638     nzB   = a->B->cmap->n;
5639     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5640     ncols = 0;
5641     for (i = 0; i < nzB; i++) { /* row < local row index */
5642       if (cmap[i] < start) idx[ncols++] = cmap[i];
5643       else break;
5644     }
5645     imark = i;
5646     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5647     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5648     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5649     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5650   } else {
5651     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5652     isrowb = *rowb;
5653     iscolb = *colb;
5654     PetscCall(PetscMalloc1(1, &bseq));
5655     bseq[0] = *B_seq;
5656   }
5657   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5658   *B_seq = bseq[0];
5659   PetscCall(PetscFree(bseq));
5660   if (!rowb) {
5661     PetscCall(ISDestroy(&isrowb));
5662   } else {
5663     *rowb = isrowb;
5664   }
5665   if (!colb) {
5666     PetscCall(ISDestroy(&iscolb));
5667   } else {
5668     *colb = iscolb;
5669   }
5670   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5671   PetscFunctionReturn(0);
5672 }
5673 
5674 /*
5675     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5676     of the OFF-DIAGONAL portion of local A
5677 
5678     Collective on Mat
5679 
5680    Input Parameters:
5681 +    A,B - the matrices in mpiaij format
5682 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5683 
5684    Output Parameter:
5685 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5686 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5687 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5688 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5689 
5690     Developer Note:
5691     This directly accesses information inside the VecScatter associated with the matrix-vector product
5692      for this matrix. This is not desirable..
5693 
5694     Level: developer
5695 
5696 */
5697 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) {
5698   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5699   Mat_SeqAIJ        *b_oth;
5700   VecScatter         ctx;
5701   MPI_Comm           comm;
5702   const PetscMPIInt *rprocs, *sprocs;
5703   const PetscInt    *srow, *rstarts, *sstarts;
5704   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5705   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5706   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5707   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5708   PetscMPIInt        size, tag, rank, nreqs;
5709 
5710   PetscFunctionBegin;
5711   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5712   PetscCallMPI(MPI_Comm_size(comm, &size));
5713 
5714   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5715     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5716   }
5717   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5718   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5719 
5720   if (size == 1) {
5721     startsj_s = NULL;
5722     bufa_ptr  = NULL;
5723     *B_oth    = NULL;
5724     PetscFunctionReturn(0);
5725   }
5726 
5727   ctx = a->Mvctx;
5728   tag = ((PetscObject)ctx)->tag;
5729 
5730   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5731   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5732   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5733   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5734   PetscCall(PetscMalloc1(nreqs, &reqs));
5735   rwaits = reqs;
5736   swaits = reqs + nrecvs;
5737 
5738   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5739   if (scall == MAT_INITIAL_MATRIX) {
5740     /* i-array */
5741     /*---------*/
5742     /*  post receives */
5743     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5744     for (i = 0; i < nrecvs; i++) {
5745       rowlen = rvalues + rstarts[i] * rbs;
5746       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5747       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5748     }
5749 
5750     /* pack the outgoing message */
5751     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5752 
5753     sstartsj[0] = 0;
5754     rstartsj[0] = 0;
5755     len         = 0; /* total length of j or a array to be sent */
5756     if (nsends) {
5757       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5758       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5759     }
5760     for (i = 0; i < nsends; i++) {
5761       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5762       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5763       for (j = 0; j < nrows; j++) {
5764         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5765         for (l = 0; l < sbs; l++) {
5766           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5767 
5768           rowlen[j * sbs + l] = ncols;
5769 
5770           len += ncols;
5771           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5772         }
5773         k++;
5774       }
5775       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5776 
5777       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5778     }
5779     /* recvs and sends of i-array are completed */
5780     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5781     PetscCall(PetscFree(svalues));
5782 
5783     /* allocate buffers for sending j and a arrays */
5784     PetscCall(PetscMalloc1(len + 1, &bufj));
5785     PetscCall(PetscMalloc1(len + 1, &bufa));
5786 
5787     /* create i-array of B_oth */
5788     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5789 
5790     b_othi[0] = 0;
5791     len       = 0; /* total length of j or a array to be received */
5792     k         = 0;
5793     for (i = 0; i < nrecvs; i++) {
5794       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5795       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5796       for (j = 0; j < nrows; j++) {
5797         b_othi[k + 1] = b_othi[k] + rowlen[j];
5798         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5799         k++;
5800       }
5801       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5802     }
5803     PetscCall(PetscFree(rvalues));
5804 
5805     /* allocate space for j and a arrays of B_oth */
5806     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5807     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5808 
5809     /* j-array */
5810     /*---------*/
5811     /*  post receives of j-array */
5812     for (i = 0; i < nrecvs; i++) {
5813       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5814       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5815     }
5816 
5817     /* pack the outgoing message j-array */
5818     if (nsends) k = sstarts[0];
5819     for (i = 0; i < nsends; i++) {
5820       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5821       bufJ  = bufj + sstartsj[i];
5822       for (j = 0; j < nrows; j++) {
5823         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5824         for (ll = 0; ll < sbs; ll++) {
5825           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5826           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5827           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5828         }
5829       }
5830       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5831     }
5832 
5833     /* recvs and sends of j-array are completed */
5834     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5835   } else if (scall == MAT_REUSE_MATRIX) {
5836     sstartsj = *startsj_s;
5837     rstartsj = *startsj_r;
5838     bufa     = *bufa_ptr;
5839     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5840     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5841   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5842 
5843   /* a-array */
5844   /*---------*/
5845   /*  post receives of a-array */
5846   for (i = 0; i < nrecvs; i++) {
5847     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5848     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5849   }
5850 
5851   /* pack the outgoing message a-array */
5852   if (nsends) k = sstarts[0];
5853   for (i = 0; i < nsends; i++) {
5854     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5855     bufA  = bufa + sstartsj[i];
5856     for (j = 0; j < nrows; j++) {
5857       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5858       for (ll = 0; ll < sbs; ll++) {
5859         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5860         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5861         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5862       }
5863     }
5864     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5865   }
5866   /* recvs and sends of a-array are completed */
5867   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5868   PetscCall(PetscFree(reqs));
5869 
5870   if (scall == MAT_INITIAL_MATRIX) {
5871     /* put together the new matrix */
5872     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5873 
5874     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5875     /* Since these are PETSc arrays, change flags to free them as necessary. */
5876     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5877     b_oth->free_a  = PETSC_TRUE;
5878     b_oth->free_ij = PETSC_TRUE;
5879     b_oth->nonew   = 0;
5880 
5881     PetscCall(PetscFree(bufj));
5882     if (!startsj_s || !bufa_ptr) {
5883       PetscCall(PetscFree2(sstartsj, rstartsj));
5884       PetscCall(PetscFree(bufa_ptr));
5885     } else {
5886       *startsj_s = sstartsj;
5887       *startsj_r = rstartsj;
5888       *bufa_ptr  = bufa;
5889     }
5890   } else if (scall == MAT_REUSE_MATRIX) {
5891     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5892   }
5893 
5894   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
5895   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
5896   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5897   PetscFunctionReturn(0);
5898 }
5899 
5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
5903 #if defined(PETSC_HAVE_MKL_SPARSE)
5904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5905 #endif
5906 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
5908 #if defined(PETSC_HAVE_ELEMENTAL)
5909 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
5910 #endif
5911 #if defined(PETSC_HAVE_SCALAPACK)
5912 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
5913 #endif
5914 #if defined(PETSC_HAVE_HYPRE)
5915 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
5916 #endif
5917 #if defined(PETSC_HAVE_CUDA)
5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
5919 #endif
5920 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
5922 #endif
5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
5924 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
5925 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5926 
5927 /*
5928     Computes (B'*A')' since computing B*A directly is untenable
5929 
5930                n                       p                          p
5931         [             ]       [             ]         [                 ]
5932       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5933         [             ]       [             ]         [                 ]
5934 
5935 */
5936 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) {
5937   Mat At, Bt, Ct;
5938 
5939   PetscFunctionBegin;
5940   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
5941   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
5942   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
5943   PetscCall(MatDestroy(&At));
5944   PetscCall(MatDestroy(&Bt));
5945   PetscCall(MatTransposeSetPrecursor(Ct, C));
5946   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
5947   PetscCall(MatDestroy(&Ct));
5948   PetscFunctionReturn(0);
5949 }
5950 
5951 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) {
5952   PetscBool cisdense;
5953 
5954   PetscFunctionBegin;
5955   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
5956   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
5957   PetscCall(MatSetBlockSizesFromMats(C, A, B));
5958   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
5959   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
5960   PetscCall(MatSetUp(C));
5961 
5962   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5963   PetscFunctionReturn(0);
5964 }
5965 
5966 /* ----------------------------------------------------------------*/
5967 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) {
5968   Mat_Product *product = C->product;
5969   Mat          A = product->A, B = product->B;
5970 
5971   PetscFunctionBegin;
5972   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5973     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5974 
5975   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5976   C->ops->productsymbolic = MatProductSymbolic_AB;
5977   PetscFunctionReturn(0);
5978 }
5979 
5980 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) {
5981   Mat_Product *product = C->product;
5982 
5983   PetscFunctionBegin;
5984   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5985   PetscFunctionReturn(0);
5986 }
5987 
5988 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
5989 
5990   Input Parameters:
5991 
5992     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5993     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5994 
5995     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
5996 
5997     For Set1, j1[] contains column indices of the nonzeros.
5998     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5999     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6000     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6001 
6002     Similar for Set2.
6003 
6004     This routine merges the two sets of nonzeros row by row and removes repeats.
6005 
6006   Output Parameters: (memory is allocated by the caller)
6007 
6008     i[],j[]: the CSR of the merged matrix, which has m rows.
6009     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6010     imap2[]: similar to imap1[], but for Set2.
6011     Note we order nonzeros row-by-row and from left to right.
6012 */
6013 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) {
6014   PetscInt   r, m; /* Row index of mat */
6015   PetscCount t, t1, t2, b1, e1, b2, e2;
6016 
6017   PetscFunctionBegin;
6018   PetscCall(MatGetLocalSize(mat, &m, NULL));
6019   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6020   i[0]        = 0;
6021   for (r = 0; r < m; r++) { /* Do row by row merging */
6022     b1 = rowBegin1[r];
6023     e1 = rowEnd1[r];
6024     b2 = rowBegin2[r];
6025     e2 = rowEnd2[r];
6026     while (b1 < e1 && b2 < e2) {
6027       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6028         j[t]      = j1[b1];
6029         imap1[t1] = t;
6030         imap2[t2] = t;
6031         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6032         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6033         t1++;
6034         t2++;
6035         t++;
6036       } else if (j1[b1] < j2[b2]) {
6037         j[t]      = j1[b1];
6038         imap1[t1] = t;
6039         b1 += jmap1[t1 + 1] - jmap1[t1];
6040         t1++;
6041         t++;
6042       } else {
6043         j[t]      = j2[b2];
6044         imap2[t2] = t;
6045         b2 += jmap2[t2 + 1] - jmap2[t2];
6046         t2++;
6047         t++;
6048       }
6049     }
6050     /* Merge the remaining in either j1[] or j2[] */
6051     while (b1 < e1) {
6052       j[t]      = j1[b1];
6053       imap1[t1] = t;
6054       b1 += jmap1[t1 + 1] - jmap1[t1];
6055       t1++;
6056       t++;
6057     }
6058     while (b2 < e2) {
6059       j[t]      = j2[b2];
6060       imap2[t2] = t;
6061       b2 += jmap2[t2 + 1] - jmap2[t2];
6062       t2++;
6063       t++;
6064     }
6065     i[r + 1] = t;
6066   }
6067   PetscFunctionReturn(0);
6068 }
6069 
6070 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6071 
6072   Input Parameters:
6073     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6074     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6075       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6076 
6077       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6078       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6079 
6080   Output Parameters:
6081     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6082     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6083       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6084       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6085 
6086     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6087       Atot: number of entries belonging to the diagonal block.
6088       Annz: number of unique nonzeros belonging to the diagonal block.
6089       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6090         repeats (i.e., same 'i,j' pair).
6091       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6092         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6093 
6094       Atot: number of entries belonging to the diagonal block
6095       Annz: number of unique nonzeros belonging to the diagonal block.
6096 
6097     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6098 
6099     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6100 */
6101 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) {
6102   PetscInt    cstart, cend, rstart, rend, row, col;
6103   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6104   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6105   PetscCount  k, m, p, q, r, s, mid;
6106   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6107 
6108   PetscFunctionBegin;
6109   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6110   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6111   m = rend - rstart;
6112 
6113   for (k = 0; k < n; k++) {
6114     if (i[k] >= 0) break;
6115   } /* Skip negative rows */
6116 
6117   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6118      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6119   */
6120   while (k < n) {
6121     row = i[k];
6122     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6123     for (s = k; s < n; s++)
6124       if (i[s] != row) break;
6125     for (p = k; p < s; p++) {
6126       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6127       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6128     }
6129     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6130     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6131     rowBegin[row - rstart] = k;
6132     rowMid[row - rstart]   = mid;
6133     rowEnd[row - rstart]   = s;
6134 
6135     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6136     Atot += mid - k;
6137     Btot += s - mid;
6138 
6139     /* Count unique nonzeros of this diag/offdiag row */
6140     for (p = k; p < mid;) {
6141       col = j[p];
6142       do {
6143         j[p] += PETSC_MAX_INT;
6144         p++;
6145       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6146       Annz++;
6147     }
6148 
6149     for (p = mid; p < s;) {
6150       col = j[p];
6151       do { p++; } while (p < s && j[p] == col);
6152       Bnnz++;
6153     }
6154     k = s;
6155   }
6156 
6157   /* Allocation according to Atot, Btot, Annz, Bnnz */
6158   PetscCall(PetscMalloc1(Atot, &Aperm));
6159   PetscCall(PetscMalloc1(Btot, &Bperm));
6160   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6161   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6162 
6163   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6164   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6165   for (r = 0; r < m; r++) {
6166     k   = rowBegin[r];
6167     mid = rowMid[r];
6168     s   = rowEnd[r];
6169     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6170     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6171     Atot += mid - k;
6172     Btot += s - mid;
6173 
6174     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6175     for (p = k; p < mid;) {
6176       col = j[p];
6177       q   = p;
6178       do { p++; } while (p < mid && j[p] == col);
6179       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6180       Annz++;
6181     }
6182 
6183     for (p = mid; p < s;) {
6184       col = j[p];
6185       q   = p;
6186       do { p++; } while (p < s && j[p] == col);
6187       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6188       Bnnz++;
6189     }
6190   }
6191   /* Output */
6192   *Aperm_ = Aperm;
6193   *Annz_  = Annz;
6194   *Atot_  = Atot;
6195   *Ajmap_ = Ajmap;
6196   *Bperm_ = Bperm;
6197   *Bnnz_  = Bnnz;
6198   *Btot_  = Btot;
6199   *Bjmap_ = Bjmap;
6200   PetscFunctionReturn(0);
6201 }
6202 
6203 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6204 
6205   Input Parameters:
6206     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6207     nnz:  number of unique nonzeros in the merged matrix
6208     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6209     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6210 
6211   Output Parameter: (memory is allocated by the caller)
6212     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6213 
6214   Example:
6215     nnz1 = 4
6216     nnz  = 6
6217     imap = [1,3,4,5]
6218     jmap = [0,3,5,6,7]
6219    then,
6220     jmap_new = [0,0,3,3,5,6,7]
6221 */
6222 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) {
6223   PetscCount k, p;
6224 
6225   PetscFunctionBegin;
6226   jmap_new[0] = 0;
6227   p           = nnz;                /* p loops over jmap_new[] backwards */
6228   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6229     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6230   }
6231   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6232   PetscFunctionReturn(0);
6233 }
6234 
6235 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) {
6236   MPI_Comm    comm;
6237   PetscMPIInt rank, size;
6238   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6239   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6240   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6241 
6242   PetscFunctionBegin;
6243   PetscCall(PetscFree(mpiaij->garray));
6244   PetscCall(VecDestroy(&mpiaij->lvec));
6245 #if defined(PETSC_USE_CTABLE)
6246   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6247 #else
6248   PetscCall(PetscFree(mpiaij->colmap));
6249 #endif
6250   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6251   mat->assembled     = PETSC_FALSE;
6252   mat->was_assembled = PETSC_FALSE;
6253   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6254 
6255   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6256   PetscCallMPI(MPI_Comm_size(comm, &size));
6257   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6258   PetscCall(PetscLayoutSetUp(mat->rmap));
6259   PetscCall(PetscLayoutSetUp(mat->cmap));
6260   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6261   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6262   PetscCall(MatGetLocalSize(mat, &m, &n));
6263   PetscCall(MatGetSize(mat, &M, &N));
6264 
6265   /* ---------------------------------------------------------------------------*/
6266   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6267   /* entries come first, then local rows, then remote rows.                     */
6268   /* ---------------------------------------------------------------------------*/
6269   PetscCount n1 = coo_n, *perm1;
6270   PetscInt  *i1 = coo_i, *j1 = coo_j;
6271 
6272   PetscCall(PetscMalloc1(n1, &perm1));
6273   for (k = 0; k < n1; k++) perm1[k] = k;
6274 
6275   /* Manipulate indices so that entries with negative row or col indices will have smallest
6276      row indices, local entries will have greater but negative row indices, and remote entries
6277      will have positive row indices.
6278   */
6279   for (k = 0; k < n1; k++) {
6280     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6281     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6282     else {
6283       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6284       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6285     }
6286   }
6287 
6288   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6289   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6290   for (k = 0; k < n1; k++) {
6291     if (i1[k] > PETSC_MIN_INT) break;
6292   }                                                                               /* Advance k to the first entry we need to take care of */
6293   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6294   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6295 
6296   /* ---------------------------------------------------------------------------*/
6297   /*           Split local rows into diag/offdiag portions                      */
6298   /* ---------------------------------------------------------------------------*/
6299   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6300   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6301   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6302 
6303   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6304   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6305   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6306 
6307   /* ---------------------------------------------------------------------------*/
6308   /*           Send remote rows to their owner                                  */
6309   /* ---------------------------------------------------------------------------*/
6310   /* Find which rows should be sent to which remote ranks*/
6311   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6312   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6313   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6314   const PetscInt *ranges;
6315   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6316 
6317   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6318   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6319   for (k = rem; k < n1;) {
6320     PetscMPIInt owner;
6321     PetscInt    firstRow, lastRow;
6322 
6323     /* Locate a row range */
6324     firstRow = i1[k]; /* first row of this owner */
6325     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6326     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6327 
6328     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6329     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6330 
6331     /* All entries in [k,p) belong to this remote owner */
6332     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6333       PetscMPIInt *sendto2;
6334       PetscInt    *nentries2;
6335       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6336 
6337       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6338       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6339       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6340       PetscCall(PetscFree2(sendto, nentries2));
6341       sendto   = sendto2;
6342       nentries = nentries2;
6343       maxNsend = maxNsend2;
6344     }
6345     sendto[nsend]   = owner;
6346     nentries[nsend] = p - k;
6347     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6348     nsend++;
6349     k = p;
6350   }
6351 
6352   /* Build 1st SF to know offsets on remote to send data */
6353   PetscSF      sf1;
6354   PetscInt     nroots = 1, nroots2 = 0;
6355   PetscInt     nleaves = nsend, nleaves2 = 0;
6356   PetscInt    *offsets;
6357   PetscSFNode *iremote;
6358 
6359   PetscCall(PetscSFCreate(comm, &sf1));
6360   PetscCall(PetscMalloc1(nsend, &iremote));
6361   PetscCall(PetscMalloc1(nsend, &offsets));
6362   for (k = 0; k < nsend; k++) {
6363     iremote[k].rank  = sendto[k];
6364     iremote[k].index = 0;
6365     nleaves2 += nentries[k];
6366     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6367   }
6368   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6369   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6370   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6371   PetscCall(PetscSFDestroy(&sf1));
6372   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6373 
6374   /* Build 2nd SF to send remote COOs to their owner */
6375   PetscSF sf2;
6376   nroots  = nroots2;
6377   nleaves = nleaves2;
6378   PetscCall(PetscSFCreate(comm, &sf2));
6379   PetscCall(PetscSFSetFromOptions(sf2));
6380   PetscCall(PetscMalloc1(nleaves, &iremote));
6381   p = 0;
6382   for (k = 0; k < nsend; k++) {
6383     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6384     for (q = 0; q < nentries[k]; q++, p++) {
6385       iremote[p].rank  = sendto[k];
6386       iremote[p].index = offsets[k] + q;
6387     }
6388   }
6389   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6390 
6391   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6392   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6393 
6394   /* Send the remote COOs to their owner */
6395   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6396   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6397   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6398   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6399   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6400   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6401   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6402 
6403   PetscCall(PetscFree(offsets));
6404   PetscCall(PetscFree2(sendto, nentries));
6405 
6406   /* ---------------------------------------------------------------*/
6407   /* Sort received COOs by row along with the permutation array     */
6408   /* ---------------------------------------------------------------*/
6409   for (k = 0; k < n2; k++) perm2[k] = k;
6410   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6411 
6412   /* ---------------------------------------------------------------*/
6413   /* Split received COOs into diag/offdiag portions                 */
6414   /* ---------------------------------------------------------------*/
6415   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6416   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6417   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6418 
6419   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6420   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6421 
6422   /* --------------------------------------------------------------------------*/
6423   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6424   /* --------------------------------------------------------------------------*/
6425   PetscInt *Ai, *Bi;
6426   PetscInt *Aj, *Bj;
6427 
6428   PetscCall(PetscMalloc1(m + 1, &Ai));
6429   PetscCall(PetscMalloc1(m + 1, &Bi));
6430   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6431   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6432 
6433   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6434   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6435   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6436   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6437   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6438 
6439   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6440   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6441 
6442   /* --------------------------------------------------------------------------*/
6443   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6444   /* expect nonzeros in A/B most likely have local contributing entries        */
6445   /* --------------------------------------------------------------------------*/
6446   PetscInt    Annz = Ai[m];
6447   PetscInt    Bnnz = Bi[m];
6448   PetscCount *Ajmap1_new, *Bjmap1_new;
6449 
6450   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6451   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6452 
6453   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6454   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6455 
6456   PetscCall(PetscFree(Aimap1));
6457   PetscCall(PetscFree(Ajmap1));
6458   PetscCall(PetscFree(Bimap1));
6459   PetscCall(PetscFree(Bjmap1));
6460   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6461   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6462   PetscCall(PetscFree(perm1));
6463   PetscCall(PetscFree3(i2, j2, perm2));
6464 
6465   Ajmap1 = Ajmap1_new;
6466   Bjmap1 = Bjmap1_new;
6467 
6468   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6469   if (Annz < Annz1 + Annz2) {
6470     PetscInt *Aj_new;
6471     PetscCall(PetscMalloc1(Annz, &Aj_new));
6472     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6473     PetscCall(PetscFree(Aj));
6474     Aj = Aj_new;
6475   }
6476 
6477   if (Bnnz < Bnnz1 + Bnnz2) {
6478     PetscInt *Bj_new;
6479     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6480     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6481     PetscCall(PetscFree(Bj));
6482     Bj = Bj_new;
6483   }
6484 
6485   /* --------------------------------------------------------------------------------*/
6486   /* Create new submatrices for on-process and off-process coupling                  */
6487   /* --------------------------------------------------------------------------------*/
6488   PetscScalar *Aa, *Ba;
6489   MatType      rtype;
6490   Mat_SeqAIJ  *a, *b;
6491   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6492   PetscCall(PetscCalloc1(Bnnz, &Ba));
6493   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6494   if (cstart) {
6495     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6496   }
6497   PetscCall(MatDestroy(&mpiaij->A));
6498   PetscCall(MatDestroy(&mpiaij->B));
6499   PetscCall(MatGetRootType_Private(mat, &rtype));
6500   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6501   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6502   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6503 
6504   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6505   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6506   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6507   a->free_a = b->free_a = PETSC_TRUE;
6508   a->free_ij = b->free_ij = PETSC_TRUE;
6509 
6510   /* conversion must happen AFTER multiply setup */
6511   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6512   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6513   PetscCall(VecDestroy(&mpiaij->lvec));
6514   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6515   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)mpiaij->lvec));
6516 
6517   mpiaij->coo_n   = coo_n;
6518   mpiaij->coo_sf  = sf2;
6519   mpiaij->sendlen = nleaves;
6520   mpiaij->recvlen = nroots;
6521 
6522   mpiaij->Annz = Annz;
6523   mpiaij->Bnnz = Bnnz;
6524 
6525   mpiaij->Annz2 = Annz2;
6526   mpiaij->Bnnz2 = Bnnz2;
6527 
6528   mpiaij->Atot1 = Atot1;
6529   mpiaij->Atot2 = Atot2;
6530   mpiaij->Btot1 = Btot1;
6531   mpiaij->Btot2 = Btot2;
6532 
6533   mpiaij->Ajmap1 = Ajmap1;
6534   mpiaij->Aperm1 = Aperm1;
6535 
6536   mpiaij->Bjmap1 = Bjmap1;
6537   mpiaij->Bperm1 = Bperm1;
6538 
6539   mpiaij->Aimap2 = Aimap2;
6540   mpiaij->Ajmap2 = Ajmap2;
6541   mpiaij->Aperm2 = Aperm2;
6542 
6543   mpiaij->Bimap2 = Bimap2;
6544   mpiaij->Bjmap2 = Bjmap2;
6545   mpiaij->Bperm2 = Bperm2;
6546 
6547   mpiaij->Cperm1 = Cperm1;
6548 
6549   /* Allocate in preallocation. If not used, it has zero cost on host */
6550   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6551   PetscFunctionReturn(0);
6552 }
6553 
6554 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) {
6555   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6556   Mat               A = mpiaij->A, B = mpiaij->B;
6557   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6558   PetscScalar      *Aa, *Ba;
6559   PetscScalar      *sendbuf = mpiaij->sendbuf;
6560   PetscScalar      *recvbuf = mpiaij->recvbuf;
6561   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6562   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6563   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6564   const PetscCount *Cperm1 = mpiaij->Cperm1;
6565 
6566   PetscFunctionBegin;
6567   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6568   PetscCall(MatSeqAIJGetArray(B, &Ba));
6569 
6570   /* Pack entries to be sent to remote */
6571   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6572 
6573   /* Send remote entries to their owner and overlap the communication with local computation */
6574   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6575   /* Add local entries to A and B */
6576   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6577     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stablility */
6578     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6579     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6580   }
6581   for (PetscCount i = 0; i < Bnnz; i++) {
6582     PetscScalar sum = 0.0;
6583     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6584     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6585   }
6586   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6587 
6588   /* Add received remote entries to A and B */
6589   for (PetscCount i = 0; i < Annz2; i++) {
6590     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6591   }
6592   for (PetscCount i = 0; i < Bnnz2; i++) {
6593     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6594   }
6595   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6596   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6597   PetscFunctionReturn(0);
6598 }
6599 
6600 /* ----------------------------------------------------------------*/
6601 
6602 /*MC
6603    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6604 
6605    Options Database Keys:
6606 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6607 
6608    Level: beginner
6609 
6610    Notes:
6611     `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values,
6612     in this case the values associated with the rows and columns one passes in are set to zero
6613     in the matrix
6614 
6615     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6616     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6617 
6618 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6619 M*/
6620 
6621 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) {
6622   Mat_MPIAIJ *b;
6623   PetscMPIInt size;
6624 
6625   PetscFunctionBegin;
6626   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6627 
6628   PetscCall(PetscNewLog(B, &b));
6629   B->data = (void *)b;
6630   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6631   B->assembled  = PETSC_FALSE;
6632   B->insertmode = NOT_SET_VALUES;
6633   b->size       = size;
6634 
6635   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6636 
6637   /* build cache for off array entries formed */
6638   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6639 
6640   b->donotstash  = PETSC_FALSE;
6641   b->colmap      = NULL;
6642   b->garray      = NULL;
6643   b->roworiented = PETSC_TRUE;
6644 
6645   /* stuff used for matrix vector multiply */
6646   b->lvec  = NULL;
6647   b->Mvctx = NULL;
6648 
6649   /* stuff for MatGetRow() */
6650   b->rowindices   = NULL;
6651   b->rowvalues    = NULL;
6652   b->getrowactive = PETSC_FALSE;
6653 
6654   /* flexible pointer used in CUSPARSE classes */
6655   b->spptr = NULL;
6656 
6657   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6658   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6659   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6660   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6661   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6662   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6663   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6664   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6665   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6666   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6667 #if defined(PETSC_HAVE_CUDA)
6668   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6669 #endif
6670 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6671   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6672 #endif
6673 #if defined(PETSC_HAVE_MKL_SPARSE)
6674   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6675 #endif
6676   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6677   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6678   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6679   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6680 #if defined(PETSC_HAVE_ELEMENTAL)
6681   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6682 #endif
6683 #if defined(PETSC_HAVE_SCALAPACK)
6684   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6685 #endif
6686   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6687   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6688 #if defined(PETSC_HAVE_HYPRE)
6689   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6690   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6691 #endif
6692   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6693   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6694   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6695   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6696   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6697   PetscFunctionReturn(0);
6698 }
6699 
6700 /*@C
6701      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6702          and "off-diagonal" part of the matrix in CSR format.
6703 
6704    Collective
6705 
6706    Input Parameters:
6707 +  comm - MPI communicator
6708 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6709 .  n - This value should be the same as the local size used in creating the
6710        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6711        calculated if N is given) For square matrices n is almost always m.
6712 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
6713 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
6714 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6715 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6716 .   a - matrix values
6717 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6718 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6719 -   oa - matrix values
6720 
6721    Output Parameter:
6722 .   mat - the matrix
6723 
6724    Level: advanced
6725 
6726    Notes:
6727        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6728        must free the arrays once the matrix has been destroyed and not before.
6729 
6730        The i and j indices are 0 based
6731 
6732        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6733 
6734        This sets local rows and cannot be used to set off-processor values.
6735 
6736        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6737        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6738        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6739        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6740        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6741        communication if it is known that only local entries will be set.
6742 
6743 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6744           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6745 @*/
6746 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) {
6747   Mat_MPIAIJ *maij;
6748 
6749   PetscFunctionBegin;
6750   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6751   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6752   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6753   PetscCall(MatCreate(comm, mat));
6754   PetscCall(MatSetSizes(*mat, m, n, M, N));
6755   PetscCall(MatSetType(*mat, MATMPIAIJ));
6756   maij = (Mat_MPIAIJ *)(*mat)->data;
6757 
6758   (*mat)->preallocated = PETSC_TRUE;
6759 
6760   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6761   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6762 
6763   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6764   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6765 
6766   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6767   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6768   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6769   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6770   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6771   PetscFunctionReturn(0);
6772 }
6773 
6774 typedef struct {
6775   Mat       *mp;    /* intermediate products */
6776   PetscBool *mptmp; /* is the intermediate product temporary ? */
6777   PetscInt   cp;    /* number of intermediate products */
6778 
6779   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6780   PetscInt    *startsj_s, *startsj_r;
6781   PetscScalar *bufa;
6782   Mat          P_oth;
6783 
6784   /* may take advantage of merging product->B */
6785   Mat Bloc; /* B-local by merging diag and off-diag */
6786 
6787   /* cusparse does not have support to split between symbolic and numeric phases.
6788      When api_user is true, we don't need to update the numerical values
6789      of the temporary storage */
6790   PetscBool reusesym;
6791 
6792   /* support for COO values insertion */
6793   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6794   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6795   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6796   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6797   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6798   PetscMemType mtype;
6799 
6800   /* customization */
6801   PetscBool abmerge;
6802   PetscBool P_oth_bind;
6803 } MatMatMPIAIJBACKEND;
6804 
6805 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) {
6806   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6807   PetscInt             i;
6808 
6809   PetscFunctionBegin;
6810   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6811   PetscCall(PetscFree(mmdata->bufa));
6812   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6813   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6814   PetscCall(MatDestroy(&mmdata->P_oth));
6815   PetscCall(MatDestroy(&mmdata->Bloc));
6816   PetscCall(PetscSFDestroy(&mmdata->sf));
6817   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6818   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6819   PetscCall(PetscFree(mmdata->own[0]));
6820   PetscCall(PetscFree(mmdata->own));
6821   PetscCall(PetscFree(mmdata->off[0]));
6822   PetscCall(PetscFree(mmdata->off));
6823   PetscCall(PetscFree(mmdata));
6824   PetscFunctionReturn(0);
6825 }
6826 
6827 /* Copy selected n entries with indices in idx[] of A to v[].
6828    If idx is NULL, copy the whole data array of A to v[]
6829  */
6830 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) {
6831   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
6832 
6833   PetscFunctionBegin;
6834   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
6835   if (f) {
6836     PetscCall((*f)(A, n, idx, v));
6837   } else {
6838     const PetscScalar *vv;
6839 
6840     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
6841     if (n && idx) {
6842       PetscScalar    *w  = v;
6843       const PetscInt *oi = idx;
6844       PetscInt        j;
6845 
6846       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6847     } else {
6848       PetscCall(PetscArraycpy(v, vv, n));
6849     }
6850     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
6851   }
6852   PetscFunctionReturn(0);
6853 }
6854 
6855 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) {
6856   MatMatMPIAIJBACKEND *mmdata;
6857   PetscInt             i, n_d, n_o;
6858 
6859   PetscFunctionBegin;
6860   MatCheckProduct(C, 1);
6861   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
6862   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
6863   if (!mmdata->reusesym) { /* update temporary matrices */
6864     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
6865     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
6866   }
6867   mmdata->reusesym = PETSC_FALSE;
6868 
6869   for (i = 0; i < mmdata->cp; i++) {
6870     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
6871     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6872   }
6873   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6874     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
6875 
6876     if (mmdata->mptmp[i]) continue;
6877     if (noff) {
6878       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
6879 
6880       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
6881       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
6882       n_o += noff;
6883       n_d += nown;
6884     } else {
6885       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
6886 
6887       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
6888       n_d += mm->nz;
6889     }
6890   }
6891   if (mmdata->hasoffproc) { /* offprocess insertion */
6892     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
6893     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
6894   }
6895   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
6896   PetscFunctionReturn(0);
6897 }
6898 
6899 /* Support for Pt * A, A * P, or Pt * A * P */
6900 #define MAX_NUMBER_INTERMEDIATE 4
6901 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) {
6902   Mat_Product           *product = C->product;
6903   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6904   Mat_MPIAIJ            *a, *p;
6905   MatMatMPIAIJBACKEND   *mmdata;
6906   ISLocalToGlobalMapping P_oth_l2g = NULL;
6907   IS                     glob      = NULL;
6908   const char            *prefix;
6909   char                   pprefix[256];
6910   const PetscInt        *globidx, *P_oth_idx;
6911   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
6912   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
6913   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE];  /* col/row map type for each Mat in mp[]. */
6914                                                                                           /* type-0: consecutive, start from 0; type-1: consecutive with */
6915                                                                                           /* a base offset; type-2: sparse with a local to global map table */
6916   const PetscInt        *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6917 
6918   MatProductType ptype;
6919   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk;
6920   PetscMPIInt    size;
6921 
6922   PetscFunctionBegin;
6923   MatCheckProduct(C, 1);
6924   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
6925   ptype = product->type;
6926   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
6927     ptype                                          = MATPRODUCT_AB;
6928     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6929   }
6930   switch (ptype) {
6931   case MATPRODUCT_AB:
6932     A          = product->A;
6933     P          = product->B;
6934     m          = A->rmap->n;
6935     n          = P->cmap->n;
6936     M          = A->rmap->N;
6937     N          = P->cmap->N;
6938     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6939     break;
6940   case MATPRODUCT_AtB:
6941     P          = product->A;
6942     A          = product->B;
6943     m          = P->cmap->n;
6944     n          = A->cmap->n;
6945     M          = P->cmap->N;
6946     N          = A->cmap->N;
6947     hasoffproc = PETSC_TRUE;
6948     break;
6949   case MATPRODUCT_PtAP:
6950     A          = product->A;
6951     P          = product->B;
6952     m          = P->cmap->n;
6953     n          = P->cmap->n;
6954     M          = P->cmap->N;
6955     N          = P->cmap->N;
6956     hasoffproc = PETSC_TRUE;
6957     break;
6958   default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
6959   }
6960   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
6961   if (size == 1) hasoffproc = PETSC_FALSE;
6962 
6963   /* defaults */
6964   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
6965     mp[i]    = NULL;
6966     mptmp[i] = PETSC_FALSE;
6967     rmapt[i] = -1;
6968     cmapt[i] = -1;
6969     rmapa[i] = NULL;
6970     cmapa[i] = NULL;
6971   }
6972 
6973   /* customization */
6974   PetscCall(PetscNew(&mmdata));
6975   mmdata->reusesym = product->api_user;
6976   if (ptype == MATPRODUCT_AB) {
6977     if (product->api_user) {
6978       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
6979       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
6980       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6981       PetscOptionsEnd();
6982     } else {
6983       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
6984       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
6985       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6986       PetscOptionsEnd();
6987     }
6988   } else if (ptype == MATPRODUCT_PtAP) {
6989     if (product->api_user) {
6990       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
6991       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6992       PetscOptionsEnd();
6993     } else {
6994       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
6995       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6996       PetscOptionsEnd();
6997     }
6998   }
6999   a = (Mat_MPIAIJ *)A->data;
7000   p = (Mat_MPIAIJ *)P->data;
7001   PetscCall(MatSetSizes(C, m, n, M, N));
7002   PetscCall(PetscLayoutSetUp(C->rmap));
7003   PetscCall(PetscLayoutSetUp(C->cmap));
7004   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7005   PetscCall(MatGetOptionsPrefix(C, &prefix));
7006 
7007   cp = 0;
7008   switch (ptype) {
7009   case MATPRODUCT_AB: /* A * P */
7010     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7011 
7012     /* A_diag * P_local (merged or not) */
7013     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7014       /* P is product->B */
7015       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7016       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7017       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7018       PetscCall(MatProductSetFill(mp[cp], product->fill));
7019       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7020       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7021       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7022       mp[cp]->product->api_user = product->api_user;
7023       PetscCall(MatProductSetFromOptions(mp[cp]));
7024       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7025       PetscCall(ISGetIndices(glob, &globidx));
7026       rmapt[cp] = 1;
7027       cmapt[cp] = 2;
7028       cmapa[cp] = globidx;
7029       mptmp[cp] = PETSC_FALSE;
7030       cp++;
7031     } else { /* A_diag * P_diag and A_diag * P_off */
7032       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7033       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7034       PetscCall(MatProductSetFill(mp[cp], product->fill));
7035       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7036       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7037       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7038       mp[cp]->product->api_user = product->api_user;
7039       PetscCall(MatProductSetFromOptions(mp[cp]));
7040       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7041       rmapt[cp] = 1;
7042       cmapt[cp] = 1;
7043       mptmp[cp] = PETSC_FALSE;
7044       cp++;
7045       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7046       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7047       PetscCall(MatProductSetFill(mp[cp], product->fill));
7048       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7049       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7050       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7051       mp[cp]->product->api_user = product->api_user;
7052       PetscCall(MatProductSetFromOptions(mp[cp]));
7053       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7054       rmapt[cp] = 1;
7055       cmapt[cp] = 2;
7056       cmapa[cp] = p->garray;
7057       mptmp[cp] = PETSC_FALSE;
7058       cp++;
7059     }
7060 
7061     /* A_off * P_other */
7062     if (mmdata->P_oth) {
7063       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7064       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7065       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7066       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7067       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7068       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7069       PetscCall(MatProductSetFill(mp[cp], product->fill));
7070       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7071       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7072       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7073       mp[cp]->product->api_user = product->api_user;
7074       PetscCall(MatProductSetFromOptions(mp[cp]));
7075       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7076       rmapt[cp] = 1;
7077       cmapt[cp] = 2;
7078       cmapa[cp] = P_oth_idx;
7079       mptmp[cp] = PETSC_FALSE;
7080       cp++;
7081     }
7082     break;
7083 
7084   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7085     /* A is product->B */
7086     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7087     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7088       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7089       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7090       PetscCall(MatProductSetFill(mp[cp], product->fill));
7091       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7092       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7093       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7094       mp[cp]->product->api_user = product->api_user;
7095       PetscCall(MatProductSetFromOptions(mp[cp]));
7096       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7097       PetscCall(ISGetIndices(glob, &globidx));
7098       rmapt[cp] = 2;
7099       rmapa[cp] = globidx;
7100       cmapt[cp] = 2;
7101       cmapa[cp] = globidx;
7102       mptmp[cp] = PETSC_FALSE;
7103       cp++;
7104     } else {
7105       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7106       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7107       PetscCall(MatProductSetFill(mp[cp], product->fill));
7108       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7109       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7110       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7111       mp[cp]->product->api_user = product->api_user;
7112       PetscCall(MatProductSetFromOptions(mp[cp]));
7113       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7114       PetscCall(ISGetIndices(glob, &globidx));
7115       rmapt[cp] = 1;
7116       cmapt[cp] = 2;
7117       cmapa[cp] = globidx;
7118       mptmp[cp] = PETSC_FALSE;
7119       cp++;
7120       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7121       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7122       PetscCall(MatProductSetFill(mp[cp], product->fill));
7123       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7124       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7125       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7126       mp[cp]->product->api_user = product->api_user;
7127       PetscCall(MatProductSetFromOptions(mp[cp]));
7128       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7129       rmapt[cp] = 2;
7130       rmapa[cp] = p->garray;
7131       cmapt[cp] = 2;
7132       cmapa[cp] = globidx;
7133       mptmp[cp] = PETSC_FALSE;
7134       cp++;
7135     }
7136     break;
7137   case MATPRODUCT_PtAP:
7138     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7139     /* P is product->B */
7140     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7141     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7142     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7143     PetscCall(MatProductSetFill(mp[cp], product->fill));
7144     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7145     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7146     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7147     mp[cp]->product->api_user = product->api_user;
7148     PetscCall(MatProductSetFromOptions(mp[cp]));
7149     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7150     PetscCall(ISGetIndices(glob, &globidx));
7151     rmapt[cp] = 2;
7152     rmapa[cp] = globidx;
7153     cmapt[cp] = 2;
7154     cmapa[cp] = globidx;
7155     mptmp[cp] = PETSC_FALSE;
7156     cp++;
7157     if (mmdata->P_oth) {
7158       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7159       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7160       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7161       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7162       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7163       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7164       PetscCall(MatProductSetFill(mp[cp], product->fill));
7165       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7166       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7167       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7168       mp[cp]->product->api_user = product->api_user;
7169       PetscCall(MatProductSetFromOptions(mp[cp]));
7170       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7171       mptmp[cp] = PETSC_TRUE;
7172       cp++;
7173       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7174       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7175       PetscCall(MatProductSetFill(mp[cp], product->fill));
7176       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7177       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7178       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7179       mp[cp]->product->api_user = product->api_user;
7180       PetscCall(MatProductSetFromOptions(mp[cp]));
7181       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7182       rmapt[cp] = 2;
7183       rmapa[cp] = globidx;
7184       cmapt[cp] = 2;
7185       cmapa[cp] = P_oth_idx;
7186       mptmp[cp] = PETSC_FALSE;
7187       cp++;
7188     }
7189     break;
7190   default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7191   }
7192   /* sanity check */
7193   if (size > 1)
7194     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7195 
7196   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7197   for (i = 0; i < cp; i++) {
7198     mmdata->mp[i]    = mp[i];
7199     mmdata->mptmp[i] = mptmp[i];
7200   }
7201   mmdata->cp             = cp;
7202   C->product->data       = mmdata;
7203   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7204   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7205 
7206   /* memory type */
7207   mmdata->mtype = PETSC_MEMTYPE_HOST;
7208   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7209   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7210   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7211   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7212 
7213   /* prepare coo coordinates for values insertion */
7214 
7215   /* count total nonzeros of those intermediate seqaij Mats
7216     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7217     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7218     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7219   */
7220   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7221     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7222     if (mptmp[cp]) continue;
7223     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7224       const PetscInt *rmap = rmapa[cp];
7225       const PetscInt  mr   = mp[cp]->rmap->n;
7226       const PetscInt  rs   = C->rmap->rstart;
7227       const PetscInt  re   = C->rmap->rend;
7228       const PetscInt *ii   = mm->i;
7229       for (i = 0; i < mr; i++) {
7230         const PetscInt gr = rmap[i];
7231         const PetscInt nz = ii[i + 1] - ii[i];
7232         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7233         else ncoo_oown += nz;                  /* this row is local */
7234       }
7235     } else ncoo_d += mm->nz;
7236   }
7237 
7238   /*
7239     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7240 
7241     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7242 
7243     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7244 
7245     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7246     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7247     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7248 
7249     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7250     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7251   */
7252   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7253   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7254 
7255   /* gather (i,j) of nonzeros inserted by remote procs */
7256   if (hasoffproc) {
7257     PetscSF  msf;
7258     PetscInt ncoo2, *coo_i2, *coo_j2;
7259 
7260     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7261     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7262     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7263 
7264     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7265       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7266       PetscInt   *idxoff = mmdata->off[cp];
7267       PetscInt   *idxown = mmdata->own[cp];
7268       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7269         const PetscInt *rmap = rmapa[cp];
7270         const PetscInt *cmap = cmapa[cp];
7271         const PetscInt *ii   = mm->i;
7272         PetscInt       *coi  = coo_i + ncoo_o;
7273         PetscInt       *coj  = coo_j + ncoo_o;
7274         const PetscInt  mr   = mp[cp]->rmap->n;
7275         const PetscInt  rs   = C->rmap->rstart;
7276         const PetscInt  re   = C->rmap->rend;
7277         const PetscInt  cs   = C->cmap->rstart;
7278         for (i = 0; i < mr; i++) {
7279           const PetscInt *jj = mm->j + ii[i];
7280           const PetscInt  gr = rmap[i];
7281           const PetscInt  nz = ii[i + 1] - ii[i];
7282           if (gr < rs || gr >= re) { /* this is an offproc row */
7283             for (j = ii[i]; j < ii[i + 1]; j++) {
7284               *coi++    = gr;
7285               *idxoff++ = j;
7286             }
7287             if (!cmapt[cp]) { /* already global */
7288               for (j = 0; j < nz; j++) *coj++ = jj[j];
7289             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7290               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7291             } else { /* offdiag */
7292               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7293             }
7294             ncoo_o += nz;
7295           } else { /* this is a local row */
7296             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7297           }
7298         }
7299       }
7300       mmdata->off[cp + 1] = idxoff;
7301       mmdata->own[cp + 1] = idxown;
7302     }
7303 
7304     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7305     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7306     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7307     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7308     ncoo = ncoo_d + ncoo_oown + ncoo2;
7309     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7310     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7311     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7312     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7313     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7314     PetscCall(PetscFree2(coo_i, coo_j));
7315     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7316     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7317     coo_i = coo_i2;
7318     coo_j = coo_j2;
7319   } else { /* no offproc values insertion */
7320     ncoo = ncoo_d;
7321     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7322 
7323     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7324     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7325     PetscCall(PetscSFSetUp(mmdata->sf));
7326   }
7327   mmdata->hasoffproc = hasoffproc;
7328 
7329   /* gather (i,j) of nonzeros inserted locally */
7330   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7331     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7332     PetscInt       *coi  = coo_i + ncoo_d;
7333     PetscInt       *coj  = coo_j + ncoo_d;
7334     const PetscInt *jj   = mm->j;
7335     const PetscInt *ii   = mm->i;
7336     const PetscInt *cmap = cmapa[cp];
7337     const PetscInt *rmap = rmapa[cp];
7338     const PetscInt  mr   = mp[cp]->rmap->n;
7339     const PetscInt  rs   = C->rmap->rstart;
7340     const PetscInt  re   = C->rmap->rend;
7341     const PetscInt  cs   = C->cmap->rstart;
7342 
7343     if (mptmp[cp]) continue;
7344     if (rmapt[cp] == 1) { /* consecutive rows */
7345       /* fill coo_i */
7346       for (i = 0; i < mr; i++) {
7347         const PetscInt gr = i + rs;
7348         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7349       }
7350       /* fill coo_j */
7351       if (!cmapt[cp]) { /* type-0, already global */
7352         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7353       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7354         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7355       } else {                                            /* type-2, local to global for sparse columns */
7356         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7357       }
7358       ncoo_d += mm->nz;
7359     } else if (rmapt[cp] == 2) { /* sparse rows */
7360       for (i = 0; i < mr; i++) {
7361         const PetscInt *jj = mm->j + ii[i];
7362         const PetscInt  gr = rmap[i];
7363         const PetscInt  nz = ii[i + 1] - ii[i];
7364         if (gr >= rs && gr < re) { /* local rows */
7365           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7366           if (!cmapt[cp]) { /* type-0, already global */
7367             for (j = 0; j < nz; j++) *coj++ = jj[j];
7368           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7369             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7370           } else { /* type-2, local to global for sparse columns */
7371             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7372           }
7373           ncoo_d += nz;
7374         }
7375       }
7376     }
7377   }
7378   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7379   PetscCall(ISDestroy(&glob));
7380   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7381   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7382   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7383   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7384 
7385   /* preallocate with COO data */
7386   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7387   PetscCall(PetscFree2(coo_i, coo_j));
7388   PetscFunctionReturn(0);
7389 }
7390 
7391 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) {
7392   Mat_Product *product = mat->product;
7393 #if defined(PETSC_HAVE_DEVICE)
7394   PetscBool match  = PETSC_FALSE;
7395   PetscBool usecpu = PETSC_FALSE;
7396 #else
7397   PetscBool match = PETSC_TRUE;
7398 #endif
7399 
7400   PetscFunctionBegin;
7401   MatCheckProduct(mat, 1);
7402 #if defined(PETSC_HAVE_DEVICE)
7403   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7404   if (match) { /* we can always fallback to the CPU if requested */
7405     switch (product->type) {
7406     case MATPRODUCT_AB:
7407       if (product->api_user) {
7408         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7409         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7410         PetscOptionsEnd();
7411       } else {
7412         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7413         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7414         PetscOptionsEnd();
7415       }
7416       break;
7417     case MATPRODUCT_AtB:
7418       if (product->api_user) {
7419         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7420         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7421         PetscOptionsEnd();
7422       } else {
7423         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7424         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7425         PetscOptionsEnd();
7426       }
7427       break;
7428     case MATPRODUCT_PtAP:
7429       if (product->api_user) {
7430         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7431         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7432         PetscOptionsEnd();
7433       } else {
7434         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7435         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7436         PetscOptionsEnd();
7437       }
7438       break;
7439     default: break;
7440     }
7441     match = (PetscBool)!usecpu;
7442   }
7443 #endif
7444   if (match) {
7445     switch (product->type) {
7446     case MATPRODUCT_AB:
7447     case MATPRODUCT_AtB:
7448     case MATPRODUCT_PtAP: mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; break;
7449     default: break;
7450     }
7451   }
7452   /* fallback to MPIAIJ ops */
7453   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7454   PetscFunctionReturn(0);
7455 }
7456 
7457 /*
7458    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7459 
7460    n - the number of block indices in cc[]
7461    cc - the block indices (must be large enough to contain the indices)
7462 */
7463 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) {
7464   PetscInt        cnt = -1, nidx, j;
7465   const PetscInt *idx;
7466 
7467   PetscFunctionBegin;
7468   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7469   if (nidx) {
7470     cnt     = 0;
7471     cc[cnt] = idx[0] / bs;
7472     for (j = 1; j < nidx; j++) {
7473       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7474     }
7475   }
7476   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7477   *n = cnt + 1;
7478   PetscFunctionReturn(0);
7479 }
7480 
7481 /*
7482     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7483 
7484     ncollapsed - the number of block indices
7485     collapsed - the block indices (must be large enough to contain the indices)
7486 */
7487 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) {
7488   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7489 
7490   PetscFunctionBegin;
7491   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7492   for (i = start + 1; i < start + bs; i++) {
7493     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7494     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7495     cprevtmp = cprev;
7496     cprev    = merged;
7497     merged   = cprevtmp;
7498   }
7499   *ncollapsed = nprev;
7500   if (collapsed) *collapsed = cprev;
7501   PetscFunctionReturn(0);
7502 }
7503 
7504 /* -------------------------------------------------------------------------- */
7505 /*
7506  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7507 
7508  Input Parameter:
7509  . Amat - matrix
7510  - symmetrize - make the result symmetric
7511  + scale - scale with diagonal
7512 
7513  Output Parameter:
7514  . a_Gmat - output scalar graph >= 0
7515 
7516  */
7517 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) {
7518   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7519   MPI_Comm  comm;
7520   Mat       Gmat;
7521   PetscBool ismpiaij, isseqaij;
7522   Mat       a, b, c;
7523   MatType   jtype;
7524 
7525   PetscFunctionBegin;
7526   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7527   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7528   PetscCall(MatGetSize(Amat, &MM, &NN));
7529   PetscCall(MatGetBlockSize(Amat, &bs));
7530   nloc = (Iend - Istart) / bs;
7531 
7532   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7533   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7534   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7535 
7536   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7537   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7538      implementation */
7539   if (bs > 1) {
7540     PetscCall(MatGetType(Amat, &jtype));
7541     PetscCall(MatCreate(comm, &Gmat));
7542     PetscCall(MatSetType(Gmat, jtype));
7543     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7544     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7545     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7546       PetscInt  *d_nnz, *o_nnz;
7547       MatScalar *aa, val, AA[4096];
7548       PetscInt  *aj, *ai, AJ[4096], nc;
7549       if (isseqaij) {
7550         a = Amat;
7551         b = NULL;
7552       } else {
7553         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7554         a             = d->A;
7555         b             = d->B;
7556       }
7557       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7558       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7559       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7560         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0;
7561         const PetscInt *cols;
7562         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
7563           PetscCall(MatGetRow(c, brow, &jj, &cols, NULL));
7564           nnz[brow / bs] = jj / bs;
7565           if (jj % bs) ok = 0;
7566           if (cols) j0 = cols[0];
7567           else j0 = -1;
7568           PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL));
7569           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7570           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
7571             PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL));
7572             if (jj % bs) ok = 0;
7573             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7574             if (nnz[brow / bs] != jj / bs) ok = 0;
7575             PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL));
7576           }
7577           if (!ok) {
7578             PetscCall(PetscFree2(d_nnz, o_nnz));
7579             goto old_bs;
7580           }
7581         }
7582         PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax);
7583       }
7584       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7585       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7586       PetscCall(PetscFree2(d_nnz, o_nnz));
7587       // diag
7588       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7589         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7590         ai               = aseq->i;
7591         n                = ai[brow + 1] - ai[brow];
7592         aj               = aseq->j + ai[brow];
7593         for (int k = 0; k < n; k += bs) {        // block columns
7594           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7595           val        = 0;
7596           for (int ii = 0; ii < bs; ii++) { // rows in block
7597             aa = aseq->a + ai[brow + ii] + k;
7598             for (int jj = 0; jj < bs; jj++) {         // columns in block
7599               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7600             }
7601           }
7602           AA[k / bs] = val;
7603         }
7604         grow = Istart / bs + brow / bs;
7605         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7606       }
7607       // off-diag
7608       if (ismpiaij) {
7609         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7610         const PetscScalar *vals;
7611         const PetscInt    *cols, *garray = aij->garray;
7612         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7613         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7614           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7615           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7616             AA[k / bs] = 0;
7617             AJ[cidx]   = garray[cols[k]] / bs;
7618           }
7619           nc = ncols / bs;
7620           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7621           for (int ii = 0; ii < bs; ii++) { // rows in block
7622             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7623             for (int k = 0; k < ncols; k += bs) {
7624               for (int jj = 0; jj < bs; jj++) { // cols in block
7625                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7626               }
7627             }
7628             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7629           }
7630           grow = Istart / bs + brow / bs;
7631           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7632         }
7633       }
7634       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7635       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7636     } else {
7637       const PetscScalar *vals;
7638       const PetscInt    *idx;
7639       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7640     old_bs:
7641       /*
7642        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7643        */
7644       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7645       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7646       if (isseqaij) {
7647         PetscInt max_d_nnz;
7648         /*
7649          Determine exact preallocation count for (sequential) scalar matrix
7650          */
7651         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7652         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7653         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7654         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7655         PetscCall(PetscFree3(w0, w1, w2));
7656       } else if (ismpiaij) {
7657         Mat             Daij, Oaij;
7658         const PetscInt *garray;
7659         PetscInt        max_d_nnz;
7660         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7661         /*
7662          Determine exact preallocation count for diagonal block portion of scalar matrix
7663          */
7664         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7665         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7666         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7667         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7668         PetscCall(PetscFree3(w0, w1, w2));
7669         /*
7670          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7671          */
7672         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7673           o_nnz[jj] = 0;
7674           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7675             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7676             o_nnz[jj] += ncols;
7677             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7678           }
7679           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7680         }
7681       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7682       /* get scalar copy (norms) of matrix */
7683       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7684       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7685       PetscCall(PetscFree2(d_nnz, o_nnz));
7686       for (Ii = Istart; Ii < Iend; Ii++) {
7687         PetscInt dest_row = Ii / bs;
7688         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7689         for (jj = 0; jj < ncols; jj++) {
7690           PetscInt    dest_col = idx[jj] / bs;
7691           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7692           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7693         }
7694         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7695       }
7696       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7697       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7698     }
7699   } else {
7700     /* TODO GPU: optimization proposal, each class provides fast implementation of this
7701      procedure via MatAbs API */
7702     /* just copy scalar matrix & abs() */
7703     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7704     if (isseqaij) {
7705       a = Gmat;
7706       b = NULL;
7707     } else {
7708       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7709       a             = d->A;
7710       b             = d->B;
7711     }
7712     /* abs */
7713     for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7714       MatInfo      info;
7715       PetscScalar *avals;
7716       PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7717       PetscCall(MatSeqAIJGetArray(c, &avals));
7718       for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7719       PetscCall(MatSeqAIJRestoreArray(c, &avals));
7720     }
7721   }
7722   if (symmetrize) {
7723     PetscBool isset, issym;
7724     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7725     if (!isset || !issym) {
7726       Mat matTrans;
7727       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7728       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7729       PetscCall(MatDestroy(&matTrans));
7730     }
7731     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7732   } else {
7733     PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7734   }
7735   if (scale) {
7736     /* scale c for all diagonal values = 1 or -1 */
7737     Vec diag;
7738     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7739     PetscCall(MatGetDiagonal(Gmat, diag));
7740     PetscCall(VecReciprocal(diag));
7741     PetscCall(VecSqrtAbs(diag));
7742     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7743     PetscCall(VecDestroy(&diag));
7744   }
7745   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7746   *a_Gmat = Gmat;
7747   PetscFunctionReturn(0);
7748 }
7749 
7750 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) {
7751   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7752   Mat                tGmat;
7753   MPI_Comm           comm;
7754   const PetscScalar *vals;
7755   const PetscInt    *idx;
7756   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7757   MatScalar         *AA; // this is checked in graph
7758   PetscBool          isseqaij;
7759   Mat                a, b, c;
7760   MatType            jtype;
7761 
7762   PetscFunctionBegin;
7763   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7764   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7765   PetscCall(MatGetType(Gmat, &jtype));
7766   PetscCall(MatCreate(comm, &tGmat));
7767   PetscCall(MatSetType(tGmat, jtype));
7768 
7769   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7770                Also, if the matrix is symmetric, can we skip this
7771                operation? It can be very expensive on large matrices. */
7772 
7773   // global sizes
7774   PetscCall(MatGetSize(Gmat, &MM, &NN));
7775   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7776   nloc = Iend - Istart;
7777   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7778   if (isseqaij) {
7779     a = Gmat;
7780     b = NULL;
7781   } else {
7782     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7783     a             = d->A;
7784     b             = d->B;
7785     garray        = d->garray;
7786   }
7787   /* Determine upper bound on non-zeros needed in new filtered matrix */
7788   for (PetscInt row = 0; row < nloc; row++) {
7789     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7790     d_nnz[row] = ncols;
7791     if (ncols > maxcols) maxcols = ncols;
7792     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7793   }
7794   if (b) {
7795     for (PetscInt row = 0; row < nloc; row++) {
7796       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7797       o_nnz[row] = ncols;
7798       if (ncols > maxcols) maxcols = ncols;
7799       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7800     }
7801   }
7802   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7803   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7804   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7805   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7806   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7807   PetscCall(PetscFree2(d_nnz, o_nnz));
7808   //
7809   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7810   nnz0 = nnz1 = 0;
7811   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7812     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7813       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7814       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7815         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7816         if (PetscRealPart(sv) > vfilter) {
7817           nnz1++;
7818           PetscInt cid = idx[jj] + Istart; //diag
7819           if (c != a) cid = garray[idx[jj]];
7820           AA[ncol_row] = vals[jj];
7821           AJ[ncol_row] = cid;
7822           ncol_row++;
7823         }
7824       }
7825       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7826       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7827     }
7828   }
7829   PetscCall(PetscFree2(AA, AJ));
7830   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7831   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7832   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7833 
7834   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7835 
7836   *filteredG = tGmat;
7837   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7838   PetscFunctionReturn(0);
7839 }
7840 
7841 /*
7842     Special version for direct calls from Fortran
7843 */
7844 #include <petsc/private/fortranimpl.h>
7845 
7846 /* Change these macros so can be used in void function */
7847 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7848 #undef PetscCall
7849 #define PetscCall(...) \
7850   do { \
7851     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
7852     if (PetscUnlikely(ierr_msv_mpiaij)) { \
7853       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
7854       return; \
7855     } \
7856   } while (0)
7857 
7858 #undef SETERRQ
7859 #define SETERRQ(comm, ierr, ...) \
7860   do { \
7861     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
7862     return; \
7863   } while (0)
7864 
7865 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7866 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7867 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7868 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7869 #else
7870 #endif
7871 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) {
7872   Mat         mat = *mmat;
7873   PetscInt    m = *mm, n = *mn;
7874   InsertMode  addv = *maddv;
7875   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
7876   PetscScalar value;
7877 
7878   MatCheckPreallocated(mat, 1);
7879   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7880   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
7881   {
7882     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
7883     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
7884     PetscBool roworiented = aij->roworiented;
7885 
7886     /* Some Variables required in the macro */
7887     Mat                    A     = aij->A;
7888     Mat_SeqAIJ            *a     = (Mat_SeqAIJ *)A->data;
7889     PetscInt              *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
7890     MatScalar             *aa;
7891     PetscBool              ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7892     Mat                    B                 = aij->B;
7893     Mat_SeqAIJ            *b                 = (Mat_SeqAIJ *)B->data;
7894     PetscInt              *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
7895     MatScalar             *ba;
7896     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7897      * cannot use "#if defined" inside a macro. */
7898     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7899 
7900     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
7901     PetscInt   nonew = a->nonew;
7902     MatScalar *ap1, *ap2;
7903 
7904     PetscFunctionBegin;
7905     PetscCall(MatSeqAIJGetArray(A, &aa));
7906     PetscCall(MatSeqAIJGetArray(B, &ba));
7907     for (i = 0; i < m; i++) {
7908       if (im[i] < 0) continue;
7909       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
7910       if (im[i] >= rstart && im[i] < rend) {
7911         row      = im[i] - rstart;
7912         lastcol1 = -1;
7913         rp1      = aj + ai[row];
7914         ap1      = aa + ai[row];
7915         rmax1    = aimax[row];
7916         nrow1    = ailen[row];
7917         low1     = 0;
7918         high1    = nrow1;
7919         lastcol2 = -1;
7920         rp2      = bj + bi[row];
7921         ap2      = ba + bi[row];
7922         rmax2    = bimax[row];
7923         nrow2    = bilen[row];
7924         low2     = 0;
7925         high2    = nrow2;
7926 
7927         for (j = 0; j < n; j++) {
7928           if (roworiented) value = v[i * n + j];
7929           else value = v[i + j * m];
7930           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7931           if (in[j] >= cstart && in[j] < cend) {
7932             col = in[j] - cstart;
7933             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
7934           } else if (in[j] < 0) continue;
7935           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7936             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7937             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
7938           } else {
7939             if (mat->was_assembled) {
7940               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
7941 #if defined(PETSC_USE_CTABLE)
7942               PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col));
7943               col--;
7944 #else
7945               col = aij->colmap[in[j]] - 1;
7946 #endif
7947               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
7948                 PetscCall(MatDisAssemble_MPIAIJ(mat));
7949                 col      = in[j];
7950                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7951                 B        = aij->B;
7952                 b        = (Mat_SeqAIJ *)B->data;
7953                 bimax    = b->imax;
7954                 bi       = b->i;
7955                 bilen    = b->ilen;
7956                 bj       = b->j;
7957                 rp2      = bj + bi[row];
7958                 ap2      = ba + bi[row];
7959                 rmax2    = bimax[row];
7960                 nrow2    = bilen[row];
7961                 low2     = 0;
7962                 high2    = nrow2;
7963                 bm       = aij->B->rmap->n;
7964                 ba       = b->a;
7965                 inserted = PETSC_FALSE;
7966               }
7967             } else col = in[j];
7968             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
7969           }
7970         }
7971       } else if (!aij->donotstash) {
7972         if (roworiented) {
7973           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7974         } else {
7975           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7976         }
7977       }
7978     }
7979     PetscCall(MatSeqAIJRestoreArray(A, &aa));
7980     PetscCall(MatSeqAIJRestoreArray(B, &ba));
7981   }
7982   PetscFunctionReturnVoid();
7983 }
7984 
7985 /* Undefining these here since they were redefined from their original definition above! No
7986  * other PETSc functions should be defined past this point, as it is impossible to recover the
7987  * original definitions */
7988 #undef PetscCall
7989 #undef SETERRQ
7990