xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 28d911a8077fa87b9a4a10e8dbd0dedeb22bcffd)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166 
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
171 {
172   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
173 
174   PetscFunctionBegin;
175   if (mat->A) {
176     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
177     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
178   }
179   PetscFunctionReturn(PETSC_SUCCESS);
180 }
181 
182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
183 {
184   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
185   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
186   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
187   const PetscInt  *ia, *ib;
188   const MatScalar *aa, *bb, *aav, *bav;
189   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
190   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
191 
192   PetscFunctionBegin;
193   *keptrows = NULL;
194 
195   ia = a->i;
196   ib = b->i;
197   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
198   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
199   for (i = 0; i < m; i++) {
200     na = ia[i + 1] - ia[i];
201     nb = ib[i + 1] - ib[i];
202     if (!na && !nb) {
203       cnt++;
204       goto ok1;
205     }
206     aa = aav + ia[i];
207     for (j = 0; j < na; j++) {
208       if (aa[j] != 0.0) goto ok1;
209     }
210     bb = bav ? bav + ib[i] : NULL;
211     for (j = 0; j < nb; j++) {
212       if (bb[j] != 0.0) goto ok1;
213     }
214     cnt++;
215   ok1:;
216   }
217   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
218   if (!n0rows) {
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
220     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
221     PetscFunctionReturn(PETSC_SUCCESS);
222   }
223   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
224   cnt = 0;
225   for (i = 0; i < m; i++) {
226     na = ia[i + 1] - ia[i];
227     nb = ib[i + 1] - ib[i];
228     if (!na && !nb) continue;
229     aa = aav + ia[i];
230     for (j = 0; j < na; j++) {
231       if (aa[j] != 0.0) {
232         rows[cnt++] = rstart + i;
233         goto ok2;
234       }
235     }
236     bb = bav ? bav + ib[i] : NULL;
237     for (j = 0; j < nb; j++) {
238       if (bb[j] != 0.0) {
239         rows[cnt++] = rstart + i;
240         goto ok2;
241       }
242     }
243   ok2:;
244   }
245   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
247   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
252 {
253   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
254   PetscBool   cong;
255 
256   PetscFunctionBegin;
257   PetscCall(MatHasCongruentLayouts(Y, &cong));
258   if (Y->assembled && cong) {
259     PetscCall(MatDiagonalSet(aij->A, D, is));
260   } else {
261     PetscCall(MatDiagonalSet_Default(Y, D, is));
262   }
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
267 {
268   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
269   PetscInt    i, rstart, nrows, *rows;
270 
271   PetscFunctionBegin;
272   *zrows = NULL;
273   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
274   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
275   for (i = 0; i < nrows; i++) rows[i] += rstart;
276   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
281 {
282   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
283   PetscInt           i, m, n, *garray = aij->garray;
284   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
285   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
286   PetscReal         *work;
287   const PetscScalar *dummy;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   if (type == NORM_INFINITY) {
313     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
314   } else {
315     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
316   }
317   PetscCall(PetscFree(work));
318   if (type == NORM_2) {
319     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
320   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
321     for (i = 0; i < n; i++) reductions[i] /= m;
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
327 {
328   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
329   IS              sis, gis;
330   const PetscInt *isis, *igis;
331   PetscInt        n, *iis, nsis, ngis, rstart, i;
332 
333   PetscFunctionBegin;
334   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
335   PetscCall(MatFindNonzeroRows(a->B, &gis));
336   PetscCall(ISGetSize(gis, &ngis));
337   PetscCall(ISGetSize(sis, &nsis));
338   PetscCall(ISGetIndices(sis, &isis));
339   PetscCall(ISGetIndices(gis, &igis));
340 
341   PetscCall(PetscMalloc1(ngis + nsis, &iis));
342   PetscCall(PetscArraycpy(iis, igis, ngis));
343   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
344   n = ngis + nsis;
345   PetscCall(PetscSortRemoveDupsInt(&n, iis));
346   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
347   for (i = 0; i < n; i++) iis[i] += rstart;
348   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
349 
350   PetscCall(ISRestoreIndices(sis, &isis));
351   PetscCall(ISRestoreIndices(gis, &igis));
352   PetscCall(ISDestroy(&sis));
353   PetscCall(ISDestroy(&gis));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*
358   Local utility routine that creates a mapping from the global column
359 number to the local number in the off-diagonal part of the local
360 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
361 a slightly higher hash table cost; without it it is not scalable (each processor
362 has an order N integer array but is fast to access.
363 */
364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
365 {
366   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
367   PetscInt    n   = aij->B->cmap->n, i;
368 
369   PetscFunctionBegin;
370   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
371 #if defined(PETSC_USE_CTABLE)
372   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
373   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
374 #else
375   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
376   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
377 #endif
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
382   do { \
383     if (col <= lastcol1) low1 = 0; \
384     else high1 = nrow1; \
385     lastcol1 = col; \
386     while (high1 - low1 > 5) { \
387       t = (low1 + high1) / 2; \
388       if (rp1[t] > col) high1 = t; \
389       else low1 = t; \
390     } \
391     for (_i = low1; _i < high1; _i++) { \
392       if (rp1[_i] > col) break; \
393       if (rp1[_i] == col) { \
394         if (addv == ADD_VALUES) { \
395           ap1[_i] += value; \
396           /* Not sure LogFlops will slow dow the code or not */ \
397           (void)PetscLogFlops(1.0); \
398         } else ap1[_i] = value; \
399         goto a_noinsert; \
400       } \
401     } \
402     if (value == 0.0 && ignorezeroentries && row != col) { \
403       low1  = 0; \
404       high1 = nrow1; \
405       goto a_noinsert; \
406     } \
407     if (nonew == 1) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
413     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
414     N = nrow1++ - 1; \
415     a->nz++; \
416     high1++; \
417     /* shift up all the later entries in this row */ \
418     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
419     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
420     rp1[_i] = col; \
421     ap1[_i] = value; \
422     A->nonzerostate++; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467     B->nonzerostate++; \
468   b_noinsert:; \
469     bilen[row] = nrow2; \
470   } while (0)
471 
472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
473 {
474   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
475   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
476   PetscInt     l, *garray                         = mat->garray, diag;
477   PetscScalar *aa, *ba;
478 
479   PetscFunctionBegin;
480   /* code only works for square matrices A */
481 
482   /* find size of row to the left of the diagonal part */
483   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
484   row = row - diag;
485   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
486     if (garray[b->j[b->i[row] + l]] > diag) break;
487   }
488   if (l) {
489     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
490     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
491     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
492   }
493 
494   /* diagonal part */
495   if (a->i[row + 1] - a->i[row]) {
496     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
497     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
498     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
499   }
500 
501   /* right of diagonal part */
502   if (b->i[row + 1] - b->i[row] - l) {
503     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
504     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
505     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
506   }
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
511 {
512   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
513   PetscScalar value = 0.0;
514   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
515   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
516   PetscBool   roworiented = aij->roworiented;
517 
518   /* Some Variables required in the macro */
519   Mat         A     = aij->A;
520   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
521   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
522   PetscBool   ignorezeroentries = a->ignorezeroentries;
523   Mat         B                 = aij->B;
524   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
525   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
526   MatScalar  *aa, *ba;
527   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
528   PetscInt    nonew;
529   MatScalar  *ap1, *ap2;
530 
531   PetscFunctionBegin;
532   PetscCall(MatSeqAIJGetArray(A, &aa));
533   PetscCall(MatSeqAIJGetArray(B, &ba));
534   for (i = 0; i < m; i++) {
535     if (im[i] < 0) continue;
536     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
537     if (im[i] >= rstart && im[i] < rend) {
538       row      = im[i] - rstart;
539       lastcol1 = -1;
540       rp1      = aj ? aj + ai[row] : NULL;
541       ap1      = aa ? aa + ai[row] : NULL;
542       rmax1    = aimax[row];
543       nrow1    = ailen[row];
544       low1     = 0;
545       high1    = nrow1;
546       lastcol2 = -1;
547       rp2      = bj ? bj + bi[row] : NULL;
548       ap2      = ba ? ba + bi[row] : NULL;
549       rmax2    = bimax[row];
550       nrow2    = bilen[row];
551       low2     = 0;
552       high2    = nrow2;
553 
554       for (j = 0; j < n; j++) {
555         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
556         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
557         if (in[j] >= cstart && in[j] < cend) {
558           col   = in[j] - cstart;
559           nonew = a->nonew;
560           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
561         } else if (in[j] < 0) {
562           continue;
563         } else {
564           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
565           if (mat->was_assembled) {
566             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
567 #if defined(PETSC_USE_CTABLE)
568             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
569             col--;
570 #else
571             col = aij->colmap[in[j]] - 1;
572 #endif
573             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
574               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
575               col = in[j];
576               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
577               B     = aij->B;
578               b     = (Mat_SeqAIJ *)B->data;
579               bimax = b->imax;
580               bi    = b->i;
581               bilen = b->ilen;
582               bj    = b->j;
583               ba    = b->a;
584               rp2   = bj + bi[row];
585               ap2   = ba + bi[row];
586               rmax2 = bimax[row];
587               nrow2 = bilen[row];
588               low2  = 0;
589               high2 = nrow2;
590               bm    = aij->B->rmap->n;
591               ba    = b->a;
592             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
593               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
594                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
595               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
596             }
597           } else col = in[j];
598           nonew = b->nonew;
599           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
600         }
601       }
602     } else {
603       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
604       if (!aij->donotstash) {
605         mat->assembled = PETSC_FALSE;
606         if (roworiented) {
607           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v ? v + i * n : NULL, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
608         } else {
609           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v ? v + i : NULL, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
610         }
611       }
612     }
613   }
614   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
615   PetscCall(MatSeqAIJRestoreArray(B, &ba));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 /*
620     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
621     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
622     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
623 */
624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
625 {
626   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
627   Mat         A      = aij->A; /* diagonal part of the matrix */
628   Mat         B      = aij->B; /* off-diagonal part of the matrix */
629   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
630   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
632   PetscInt   *ailen = a->ilen, *aj = a->j;
633   PetscInt   *bilen = b->ilen, *bj = b->j;
634   PetscInt    am          = aij->A->rmap->n, j;
635   PetscInt    diag_so_far = 0, dnz;
636   PetscInt    offd_so_far = 0, onz;
637 
638   PetscFunctionBegin;
639   /* Iterate over all rows of the matrix */
640   for (j = 0; j < am; j++) {
641     dnz = onz = 0;
642     /*  Iterate over all non-zero columns of the current row */
643     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
644       /* If column is in the diagonal */
645       if (mat_j[col] >= cstart && mat_j[col] < cend) {
646         aj[diag_so_far++] = mat_j[col] - cstart;
647         dnz++;
648       } else { /* off-diagonal entries */
649         bj[offd_so_far++] = mat_j[col];
650         onz++;
651       }
652     }
653     ailen[j] = dnz;
654     bilen[j] = onz;
655   }
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 /*
660     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
661     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
662     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
663     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
664     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
665 */
666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
667 {
668   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
669   Mat          A    = aij->A; /* diagonal part of the matrix */
670   Mat          B    = aij->B; /* off-diagonal part of the matrix */
671   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
672   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
673   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
674   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
675   PetscInt    *ailen = a->ilen, *aj = a->j;
676   PetscInt    *bilen = b->ilen, *bj = b->j;
677   PetscInt     am          = aij->A->rmap->n, j;
678   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
679   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
680   PetscScalar *aa = a->a, *ba = b->a;
681 
682   PetscFunctionBegin;
683   /* Iterate over all rows of the matrix */
684   for (j = 0; j < am; j++) {
685     dnz_row = onz_row = 0;
686     rowstart_offd     = full_offd_i[j];
687     rowstart_diag     = full_diag_i[j];
688     /*  Iterate over all non-zero columns of the current row */
689     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
690       /* If column is in the diagonal */
691       if (mat_j[col] >= cstart && mat_j[col] < cend) {
692         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
693         aa[rowstart_diag + dnz_row] = mat_a[col];
694         dnz_row++;
695       } else { /* off-diagonal entries */
696         bj[rowstart_offd + onz_row] = mat_j[col];
697         ba[rowstart_offd + onz_row] = mat_a[col];
698         onz_row++;
699       }
700     }
701     ailen[j] = dnz_row;
702     bilen[j] = onz_row;
703   }
704   PetscFunctionReturn(PETSC_SUCCESS);
705 }
706 
707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
708 {
709   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
710   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
711   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
712 
713   PetscFunctionBegin;
714   for (i = 0; i < m; i++) {
715     if (idxm[i] < 0) continue; /* negative row */
716     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
717     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
718     row = idxm[i] - rstart;
719     for (j = 0; j < n; j++) {
720       if (idxn[j] < 0) continue; /* negative column */
721       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
722       if (idxn[j] >= cstart && idxn[j] < cend) {
723         col = idxn[j] - cstart;
724         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
725       } else {
726         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
727 #if defined(PETSC_USE_CTABLE)
728         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
729         col--;
730 #else
731         col = aij->colmap[idxn[j]] - 1;
732 #endif
733         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
734         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
735       }
736     }
737   }
738   PetscFunctionReturn(PETSC_SUCCESS);
739 }
740 
741 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
742 {
743   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
744   PetscInt    nstash, reallocs;
745 
746   PetscFunctionBegin;
747   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
748 
749   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
750   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
751   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
752   PetscFunctionReturn(PETSC_SUCCESS);
753 }
754 
755 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
756 {
757   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
758   PetscMPIInt  n;
759   PetscInt     i, j, rstart, ncols, flg;
760   PetscInt    *row, *col;
761   PetscBool    other_disassembled;
762   PetscScalar *val;
763 
764   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
765 
766   PetscFunctionBegin;
767   if (!aij->donotstash && !mat->nooffprocentries) {
768     while (1) {
769       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
770       if (!flg) break;
771 
772       for (i = 0; i < n;) {
773         /* Now identify the consecutive vals belonging to the same row */
774         for (j = i, rstart = row[j]; j < n; j++) {
775           if (row[j] != rstart) break;
776         }
777         if (j < n) ncols = j - i;
778         else ncols = n - i;
779         /* Now assemble all these values with a single function call */
780         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
781         i = j;
782       }
783     }
784     PetscCall(MatStashScatterEnd_Private(&mat->stash));
785   }
786 #if defined(PETSC_HAVE_DEVICE)
787   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
788   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
789   if (mat->boundtocpu) {
790     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
791     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
792   }
793 #endif
794   PetscCall(MatAssemblyBegin(aij->A, mode));
795   PetscCall(MatAssemblyEnd(aij->A, mode));
796 
797   /* determine if any processor has disassembled, if so we must
798      also disassemble ourself, in order that we may reassemble. */
799   /*
800      if nonzero structure of submatrix B cannot change then we know that
801      no processor disassembled thus we can skip this stuff
802   */
803   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
804     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
805     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
806       PetscCall(MatDisAssemble_MPIAIJ(mat));
807     }
808   }
809   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
810   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
811 #if defined(PETSC_HAVE_DEVICE)
812   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
813 #endif
814   PetscCall(MatAssemblyBegin(aij->B, mode));
815   PetscCall(MatAssemblyEnd(aij->B, mode));
816 
817   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
818 
819   aij->rowvalues = NULL;
820 
821   PetscCall(VecDestroy(&aij->diag));
822 
823   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
824   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
825     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
826     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
827   }
828 #if defined(PETSC_HAVE_DEVICE)
829   mat->offloadmask = PETSC_OFFLOAD_BOTH;
830 #endif
831   PetscFunctionReturn(PETSC_SUCCESS);
832 }
833 
834 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
835 {
836   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
837 
838   PetscFunctionBegin;
839   PetscCall(MatZeroEntries(l->A));
840   PetscCall(MatZeroEntries(l->B));
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
845 {
846   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
847   PetscObjectState sA, sB;
848   PetscInt        *lrows;
849   PetscInt         r, len;
850   PetscBool        cong, lch, gch;
851 
852   PetscFunctionBegin;
853   /* get locally owned rows */
854   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
855   PetscCall(MatHasCongruentLayouts(A, &cong));
856   /* fix right hand side if needed */
857   if (x && b) {
858     const PetscScalar *xx;
859     PetscScalar       *bb;
860 
861     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
862     PetscCall(VecGetArrayRead(x, &xx));
863     PetscCall(VecGetArray(b, &bb));
864     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
865     PetscCall(VecRestoreArrayRead(x, &xx));
866     PetscCall(VecRestoreArray(b, &bb));
867   }
868 
869   sA = mat->A->nonzerostate;
870   sB = mat->B->nonzerostate;
871 
872   if (diag != 0.0 && cong) {
873     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
874     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
875   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
876     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
877     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
878     PetscInt    nnwA, nnwB;
879     PetscBool   nnzA, nnzB;
880 
881     nnwA = aijA->nonew;
882     nnwB = aijB->nonew;
883     nnzA = aijA->keepnonzeropattern;
884     nnzB = aijB->keepnonzeropattern;
885     if (!nnzA) {
886       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
887       aijA->nonew = 0;
888     }
889     if (!nnzB) {
890       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
891       aijB->nonew = 0;
892     }
893     /* Must zero here before the next loop */
894     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
895     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
896     for (r = 0; r < len; ++r) {
897       const PetscInt row = lrows[r] + A->rmap->rstart;
898       if (row >= A->cmap->N) continue;
899       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
900     }
901     aijA->nonew = nnwA;
902     aijB->nonew = nnwB;
903   } else {
904     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
905     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
906   }
907   PetscCall(PetscFree(lrows));
908   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
909   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
910 
911   /* reduce nonzerostate */
912   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
913   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
914   if (gch) A->nonzerostate++;
915   PetscFunctionReturn(PETSC_SUCCESS);
916 }
917 
918 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
919 {
920   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
921   PetscMPIInt        n = A->rmap->n;
922   PetscInt           i, j, r, m, len = 0;
923   PetscInt          *lrows, *owners = A->rmap->range;
924   PetscMPIInt        p = 0;
925   PetscSFNode       *rrows;
926   PetscSF            sf;
927   const PetscScalar *xx;
928   PetscScalar       *bb, *mask, *aij_a;
929   Vec                xmask, lmask;
930   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
931   const PetscInt    *aj, *ii, *ridx;
932   PetscScalar       *aa;
933 
934   PetscFunctionBegin;
935   /* Create SF where leaves are input rows and roots are owned rows */
936   PetscCall(PetscMalloc1(n, &lrows));
937   for (r = 0; r < n; ++r) lrows[r] = -1;
938   PetscCall(PetscMalloc1(N, &rrows));
939   for (r = 0; r < N; ++r) {
940     const PetscInt idx = rows[r];
941     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
942     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
943       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
944     }
945     rrows[r].rank  = p;
946     rrows[r].index = rows[r] - owners[p];
947   }
948   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
949   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
950   /* Collect flags for rows to be zeroed */
951   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
952   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
953   PetscCall(PetscSFDestroy(&sf));
954   /* Compress and put in row numbers */
955   for (r = 0; r < n; ++r)
956     if (lrows[r] >= 0) lrows[len++] = r;
957   /* zero diagonal part of matrix */
958   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
959   /* handle off-diagonal part of matrix */
960   PetscCall(MatCreateVecs(A, &xmask, NULL));
961   PetscCall(VecDuplicate(l->lvec, &lmask));
962   PetscCall(VecGetArray(xmask, &bb));
963   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
964   PetscCall(VecRestoreArray(xmask, &bb));
965   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
966   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
967   PetscCall(VecDestroy(&xmask));
968   if (x && b) { /* this code is buggy when the row and column layout don't match */
969     PetscBool cong;
970 
971     PetscCall(MatHasCongruentLayouts(A, &cong));
972     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
973     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
974     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
975     PetscCall(VecGetArrayRead(l->lvec, &xx));
976     PetscCall(VecGetArray(b, &bb));
977   }
978   PetscCall(VecGetArray(lmask, &mask));
979   /* remove zeroed rows of off-diagonal matrix */
980   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
981   ii = aij->i;
982   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
983   /* loop over all elements of off process part of matrix zeroing removed columns*/
984   if (aij->compressedrow.use) {
985     m    = aij->compressedrow.nrows;
986     ii   = aij->compressedrow.i;
987     ridx = aij->compressedrow.rindex;
988     for (i = 0; i < m; i++) {
989       n  = ii[i + 1] - ii[i];
990       aj = aij->j + ii[i];
991       aa = aij_a + ii[i];
992 
993       for (j = 0; j < n; j++) {
994         if (PetscAbsScalar(mask[*aj])) {
995           if (b) bb[*ridx] -= *aa * xx[*aj];
996           *aa = 0.0;
997         }
998         aa++;
999         aj++;
1000       }
1001       ridx++;
1002     }
1003   } else { /* do not use compressed row format */
1004     m = l->B->rmap->n;
1005     for (i = 0; i < m; i++) {
1006       n  = ii[i + 1] - ii[i];
1007       aj = aij->j + ii[i];
1008       aa = aij_a + ii[i];
1009       for (j = 0; j < n; j++) {
1010         if (PetscAbsScalar(mask[*aj])) {
1011           if (b) bb[i] -= *aa * xx[*aj];
1012           *aa = 0.0;
1013         }
1014         aa++;
1015         aj++;
1016       }
1017     }
1018   }
1019   if (x && b) {
1020     PetscCall(VecRestoreArray(b, &bb));
1021     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1022   }
1023   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1024   PetscCall(VecRestoreArray(lmask, &mask));
1025   PetscCall(VecDestroy(&lmask));
1026   PetscCall(PetscFree(lrows));
1027 
1028   /* only change matrix nonzero state if pattern was allowed to be changed */
1029   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1030     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1031     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1032   }
1033   PetscFunctionReturn(PETSC_SUCCESS);
1034 }
1035 
1036 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1037 {
1038   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1039   PetscInt    nt;
1040   VecScatter  Mvctx = a->Mvctx;
1041 
1042   PetscFunctionBegin;
1043   PetscCall(VecGetLocalSize(xx, &nt));
1044   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1045   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1046   PetscUseTypeMethod(a->A, mult, xx, yy);
1047   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1048   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1055 
1056   PetscFunctionBegin;
1057   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1058   PetscFunctionReturn(PETSC_SUCCESS);
1059 }
1060 
1061 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1062 {
1063   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1064   VecScatter  Mvctx = a->Mvctx;
1065 
1066   PetscFunctionBegin;
1067   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1068   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1069   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1070   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1071   PetscFunctionReturn(PETSC_SUCCESS);
1072 }
1073 
1074 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1075 {
1076   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1077 
1078   PetscFunctionBegin;
1079   /* do nondiagonal part */
1080   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1081   /* do local part */
1082   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1083   /* add partial results together */
1084   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1085   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1090 {
1091   MPI_Comm    comm;
1092   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1093   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1094   IS          Me, Notme;
1095   PetscInt    M, N, first, last, *notme, i;
1096   PetscBool   lf;
1097   PetscMPIInt size;
1098 
1099   PetscFunctionBegin;
1100   /* Easy test: symmetric diagonal block */
1101   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1102   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1103   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1104   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1105   PetscCallMPI(MPI_Comm_size(comm, &size));
1106   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1107 
1108   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1109   PetscCall(MatGetSize(Amat, &M, &N));
1110   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1111   PetscCall(PetscMalloc1(N - last + first, &notme));
1112   for (i = 0; i < first; i++) notme[i] = i;
1113   for (i = last; i < M; i++) notme[i - last + first] = i;
1114   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1115   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1116   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1117   Aoff = Aoffs[0];
1118   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1119   Boff = Boffs[0];
1120   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1121   PetscCall(MatDestroyMatrices(1, &Aoffs));
1122   PetscCall(MatDestroyMatrices(1, &Boffs));
1123   PetscCall(ISDestroy(&Me));
1124   PetscCall(ISDestroy(&Notme));
1125   PetscCall(PetscFree(notme));
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128 
1129 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1130 {
1131   PetscFunctionBegin;
1132   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1133   PetscFunctionReturn(PETSC_SUCCESS);
1134 }
1135 
1136 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1137 {
1138   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1139 
1140   PetscFunctionBegin;
1141   /* do nondiagonal part */
1142   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1143   /* do local part */
1144   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1145   /* add partial results together */
1146   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1147   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1148   PetscFunctionReturn(PETSC_SUCCESS);
1149 }
1150 
1151 /*
1152   This only works correctly for square matrices where the subblock A->A is the
1153    diagonal block
1154 */
1155 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1161   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1162   PetscCall(MatGetDiagonal(a->A, v));
1163   PetscFunctionReturn(PETSC_SUCCESS);
1164 }
1165 
1166 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1167 {
1168   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCall(MatScale(a->A, aa));
1172   PetscCall(MatScale(a->B, aa));
1173   PetscFunctionReturn(PETSC_SUCCESS);
1174 }
1175 
1176 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1177 {
1178   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1179   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1180   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1181   const PetscInt    *garray = aij->garray;
1182   const PetscScalar *aa, *ba;
1183   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1184   PetscInt64         nz, hnz;
1185   PetscInt          *rowlens;
1186   PetscInt          *colidxs;
1187   PetscScalar       *matvals;
1188   PetscMPIInt        rank;
1189 
1190   PetscFunctionBegin;
1191   PetscCall(PetscViewerSetUp(viewer));
1192 
1193   M  = mat->rmap->N;
1194   N  = mat->cmap->N;
1195   m  = mat->rmap->n;
1196   rs = mat->rmap->rstart;
1197   cs = mat->cmap->rstart;
1198   nz = A->nz + B->nz;
1199 
1200   /* write matrix header */
1201   header[0] = MAT_FILE_CLASSID;
1202   header[1] = M;
1203   header[2] = N;
1204   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1205   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1206   if (rank == 0) {
1207     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1208     else header[3] = (PetscInt)hnz;
1209   }
1210   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1211 
1212   /* fill in and store row lengths  */
1213   PetscCall(PetscMalloc1(m, &rowlens));
1214   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1215   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1216   PetscCall(PetscFree(rowlens));
1217 
1218   /* fill in and store column indices */
1219   PetscCall(PetscMalloc1(nz, &colidxs));
1220   for (cnt = 0, i = 0; i < m; i++) {
1221     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1222       if (garray[B->j[jb]] > cs) break;
1223       colidxs[cnt++] = garray[B->j[jb]];
1224     }
1225     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1226     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1227   }
1228   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1229   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1230   PetscCall(PetscFree(colidxs));
1231 
1232   /* fill in and store nonzero values */
1233   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1235   PetscCall(PetscMalloc1(nz, &matvals));
1236   for (cnt = 0, i = 0; i < m; i++) {
1237     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       matvals[cnt++] = ba[jb];
1240     }
1241     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1242     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1243   }
1244   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1245   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1246   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1247   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1248   PetscCall(PetscFree(matvals));
1249 
1250   /* write block size option to the viewer's .info file */
1251   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1252   PetscFunctionReturn(PETSC_SUCCESS);
1253 }
1254 
1255 #include <petscdraw.h>
1256 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1257 {
1258   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1259   PetscMPIInt       rank = aij->rank, size = aij->size;
1260   PetscBool         isdraw, iascii, isbinary;
1261   PetscViewer       sviewer;
1262   PetscViewerFormat format;
1263 
1264   PetscFunctionBegin;
1265   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1266   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1267   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1268   if (iascii) {
1269     PetscCall(PetscViewerGetFormat(viewer, &format));
1270     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1271       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1272       PetscCall(PetscMalloc1(size, &nz));
1273       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1274       for (i = 0; i < (PetscInt)size; i++) {
1275         nmax = PetscMax(nmax, nz[i]);
1276         nmin = PetscMin(nmin, nz[i]);
1277         navg += nz[i];
1278       }
1279       PetscCall(PetscFree(nz));
1280       navg = navg / size;
1281       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1282       PetscFunctionReturn(PETSC_SUCCESS);
1283     }
1284     PetscCall(PetscViewerGetFormat(viewer, &format));
1285     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1286       MatInfo   info;
1287       PetscInt *inodes = NULL;
1288 
1289       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1290       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1291       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1292       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1293       if (!inodes) {
1294         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1295                                                      (double)info.memory));
1296       } else {
1297         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1298                                                      (double)info.memory));
1299       }
1300       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1301       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1302       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1303       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1304       PetscCall(PetscViewerFlush(viewer));
1305       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1306       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1307       PetscCall(VecScatterView(aij->Mvctx, viewer));
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1310       PetscInt inodecount, inodelimit, *inodes;
1311       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1312       if (inodes) {
1313         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1314       } else {
1315         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1316       }
1317       PetscFunctionReturn(PETSC_SUCCESS);
1318     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1319       PetscFunctionReturn(PETSC_SUCCESS);
1320     }
1321   } else if (isbinary) {
1322     if (size == 1) {
1323       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1324       PetscCall(MatView(aij->A, viewer));
1325     } else {
1326       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1327     }
1328     PetscFunctionReturn(PETSC_SUCCESS);
1329   } else if (iascii && size == 1) {
1330     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1331     PetscCall(MatView(aij->A, viewer));
1332     PetscFunctionReturn(PETSC_SUCCESS);
1333   } else if (isdraw) {
1334     PetscDraw draw;
1335     PetscBool isnull;
1336     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1337     PetscCall(PetscDrawIsNull(draw, &isnull));
1338     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1339   }
1340 
1341   { /* assemble the entire matrix onto first processor */
1342     Mat A = NULL, Av;
1343     IS  isrow, iscol;
1344 
1345     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1346     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1347     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1348     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1349     /*  The commented code uses MatCreateSubMatrices instead */
1350     /*
1351     Mat *AA, A = NULL, Av;
1352     IS  isrow,iscol;
1353 
1354     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1355     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1356     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1357     if (rank == 0) {
1358        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1359        A    = AA[0];
1360        Av   = AA[0];
1361     }
1362     PetscCall(MatDestroySubMatrices(1,&AA));
1363 */
1364     PetscCall(ISDestroy(&iscol));
1365     PetscCall(ISDestroy(&isrow));
1366     /*
1367        Everyone has to call to draw the matrix since the graphics waits are
1368        synchronized across all processors that share the PetscDraw object
1369     */
1370     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1371     if (rank == 0) {
1372       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1373       PetscCall(MatView_SeqAIJ(Av, sviewer));
1374     }
1375     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1376     PetscCall(MatDestroy(&A));
1377   }
1378   PetscFunctionReturn(PETSC_SUCCESS);
1379 }
1380 
1381 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1382 {
1383   PetscBool iascii, isdraw, issocket, isbinary;
1384 
1385   PetscFunctionBegin;
1386   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1387   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1388   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1389   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1390   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1391   PetscFunctionReturn(PETSC_SUCCESS);
1392 }
1393 
1394 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1395 {
1396   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1397   Vec         bb1 = NULL;
1398   PetscBool   hasop;
1399 
1400   PetscFunctionBegin;
1401   if (flag == SOR_APPLY_UPPER) {
1402     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1403     PetscFunctionReturn(PETSC_SUCCESS);
1404   }
1405 
1406   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1407 
1408   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1409     if (flag & SOR_ZERO_INITIAL_GUESS) {
1410       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1411       its--;
1412     }
1413 
1414     while (its--) {
1415       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1416       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1417 
1418       /* update rhs: bb1 = bb - B*x */
1419       PetscCall(VecScale(mat->lvec, -1.0));
1420       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1421 
1422       /* local sweep */
1423       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1424     }
1425   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1426     if (flag & SOR_ZERO_INITIAL_GUESS) {
1427       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1428       its--;
1429     }
1430     while (its--) {
1431       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1432       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1433 
1434       /* update rhs: bb1 = bb - B*x */
1435       PetscCall(VecScale(mat->lvec, -1.0));
1436       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1437 
1438       /* local sweep */
1439       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1440     }
1441   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1444       its--;
1445     }
1446     while (its--) {
1447       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1448       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1449 
1450       /* update rhs: bb1 = bb - B*x */
1451       PetscCall(VecScale(mat->lvec, -1.0));
1452       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1453 
1454       /* local sweep */
1455       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1456     }
1457   } else if (flag & SOR_EISENSTAT) {
1458     Vec xx1;
1459 
1460     PetscCall(VecDuplicate(bb, &xx1));
1461     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1462 
1463     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1464     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1465     if (!mat->diag) {
1466       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1467       PetscCall(MatGetDiagonal(matin, mat->diag));
1468     }
1469     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1470     if (hasop) {
1471       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1472     } else {
1473       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1474     }
1475     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1476 
1477     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1478 
1479     /* local sweep */
1480     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1481     PetscCall(VecAXPY(xx, 1.0, xx1));
1482     PetscCall(VecDestroy(&xx1));
1483   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1484 
1485   PetscCall(VecDestroy(&bb1));
1486 
1487   matin->factorerrortype = mat->A->factorerrortype;
1488   PetscFunctionReturn(PETSC_SUCCESS);
1489 }
1490 
1491 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1492 {
1493   Mat             aA, aB, Aperm;
1494   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1495   PetscScalar    *aa, *ba;
1496   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1497   PetscSF         rowsf, sf;
1498   IS              parcolp = NULL;
1499   PetscBool       done;
1500 
1501   PetscFunctionBegin;
1502   PetscCall(MatGetLocalSize(A, &m, &n));
1503   PetscCall(ISGetIndices(rowp, &rwant));
1504   PetscCall(ISGetIndices(colp, &cwant));
1505   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1506 
1507   /* Invert row permutation to find out where my rows should go */
1508   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1509   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1510   PetscCall(PetscSFSetFromOptions(rowsf));
1511   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1512   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1513   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1514 
1515   /* Invert column permutation to find out where my columns should go */
1516   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1517   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1518   PetscCall(PetscSFSetFromOptions(sf));
1519   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1520   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1521   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1522   PetscCall(PetscSFDestroy(&sf));
1523 
1524   PetscCall(ISRestoreIndices(rowp, &rwant));
1525   PetscCall(ISRestoreIndices(colp, &cwant));
1526   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1527 
1528   /* Find out where my gcols should go */
1529   PetscCall(MatGetSize(aB, NULL, &ng));
1530   PetscCall(PetscMalloc1(ng, &gcdest));
1531   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1532   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1533   PetscCall(PetscSFSetFromOptions(sf));
1534   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1535   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1536   PetscCall(PetscSFDestroy(&sf));
1537 
1538   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1539   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1540   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1541   for (i = 0; i < m; i++) {
1542     PetscInt    row = rdest[i];
1543     PetscMPIInt rowner;
1544     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1545     for (j = ai[i]; j < ai[i + 1]; j++) {
1546       PetscInt    col = cdest[aj[j]];
1547       PetscMPIInt cowner;
1548       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1549       if (rowner == cowner) dnnz[i]++;
1550       else onnz[i]++;
1551     }
1552     for (j = bi[i]; j < bi[i + 1]; j++) {
1553       PetscInt    col = gcdest[bj[j]];
1554       PetscMPIInt cowner;
1555       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1556       if (rowner == cowner) dnnz[i]++;
1557       else onnz[i]++;
1558     }
1559   }
1560   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1561   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1562   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1563   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1564   PetscCall(PetscSFDestroy(&rowsf));
1565 
1566   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1567   PetscCall(MatSeqAIJGetArray(aA, &aa));
1568   PetscCall(MatSeqAIJGetArray(aB, &ba));
1569   for (i = 0; i < m; i++) {
1570     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1571     PetscInt  j0, rowlen;
1572     rowlen = ai[i + 1] - ai[i];
1573     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1574       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1575       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1576     }
1577     rowlen = bi[i + 1] - bi[i];
1578     for (j0 = j = 0; j < rowlen; j0 = j) {
1579       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1580       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1581     }
1582   }
1583   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1584   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1585   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1586   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1587   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1588   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1589   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1590   PetscCall(PetscFree3(work, rdest, cdest));
1591   PetscCall(PetscFree(gcdest));
1592   if (parcolp) PetscCall(ISDestroy(&colp));
1593   *B = Aperm;
1594   PetscFunctionReturn(PETSC_SUCCESS);
1595 }
1596 
1597 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1598 {
1599   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1600 
1601   PetscFunctionBegin;
1602   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1603   if (ghosts) *ghosts = aij->garray;
1604   PetscFunctionReturn(PETSC_SUCCESS);
1605 }
1606 
1607 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1608 {
1609   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1610   Mat            A = mat->A, B = mat->B;
1611   PetscLogDouble isend[5], irecv[5];
1612 
1613   PetscFunctionBegin;
1614   info->block_size = 1.0;
1615   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1616 
1617   isend[0] = info->nz_used;
1618   isend[1] = info->nz_allocated;
1619   isend[2] = info->nz_unneeded;
1620   isend[3] = info->memory;
1621   isend[4] = info->mallocs;
1622 
1623   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1624 
1625   isend[0] += info->nz_used;
1626   isend[1] += info->nz_allocated;
1627   isend[2] += info->nz_unneeded;
1628   isend[3] += info->memory;
1629   isend[4] += info->mallocs;
1630   if (flag == MAT_LOCAL) {
1631     info->nz_used      = isend[0];
1632     info->nz_allocated = isend[1];
1633     info->nz_unneeded  = isend[2];
1634     info->memory       = isend[3];
1635     info->mallocs      = isend[4];
1636   } else if (flag == MAT_GLOBAL_MAX) {
1637     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1638 
1639     info->nz_used      = irecv[0];
1640     info->nz_allocated = irecv[1];
1641     info->nz_unneeded  = irecv[2];
1642     info->memory       = irecv[3];
1643     info->mallocs      = irecv[4];
1644   } else if (flag == MAT_GLOBAL_SUM) {
1645     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1646 
1647     info->nz_used      = irecv[0];
1648     info->nz_allocated = irecv[1];
1649     info->nz_unneeded  = irecv[2];
1650     info->memory       = irecv[3];
1651     info->mallocs      = irecv[4];
1652   }
1653   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1654   info->fill_ratio_needed = 0;
1655   info->factor_mallocs    = 0;
1656   PetscFunctionReturn(PETSC_SUCCESS);
1657 }
1658 
1659 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1660 {
1661   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1662 
1663   PetscFunctionBegin;
1664   switch (op) {
1665   case MAT_NEW_NONZERO_LOCATIONS:
1666   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1667   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1668   case MAT_KEEP_NONZERO_PATTERN:
1669   case MAT_NEW_NONZERO_LOCATION_ERR:
1670   case MAT_USE_INODES:
1671   case MAT_IGNORE_ZERO_ENTRIES:
1672   case MAT_FORM_EXPLICIT_TRANSPOSE:
1673     MatCheckPreallocated(A, 1);
1674     PetscCall(MatSetOption(a->A, op, flg));
1675     PetscCall(MatSetOption(a->B, op, flg));
1676     break;
1677   case MAT_ROW_ORIENTED:
1678     MatCheckPreallocated(A, 1);
1679     a->roworiented = flg;
1680 
1681     PetscCall(MatSetOption(a->A, op, flg));
1682     PetscCall(MatSetOption(a->B, op, flg));
1683     break;
1684   case MAT_FORCE_DIAGONAL_ENTRIES:
1685   case MAT_SORTED_FULL:
1686     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1687     break;
1688   case MAT_IGNORE_OFF_PROC_ENTRIES:
1689     a->donotstash = flg;
1690     break;
1691   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1692   case MAT_SPD:
1693   case MAT_SYMMETRIC:
1694   case MAT_STRUCTURALLY_SYMMETRIC:
1695   case MAT_HERMITIAN:
1696   case MAT_SYMMETRY_ETERNAL:
1697   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1698   case MAT_SPD_ETERNAL:
1699     /* if the diagonal matrix is square it inherits some of the properties above */
1700     break;
1701   case MAT_SUBMAT_SINGLEIS:
1702     A->submat_singleis = flg;
1703     break;
1704   case MAT_STRUCTURE_ONLY:
1705     /* The option is handled directly by MatSetOption() */
1706     break;
1707   default:
1708     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1714 {
1715   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1716   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1717   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1718   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1719   PetscInt    *cmap, *idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1730     PetscInt    max = 1, tmp;
1731     for (i = 0; i < matin->rmap->n; i++) {
1732       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1736   }
1737 
1738   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA;
1742   pcA = &cworkA;
1743   pvB = &vworkB;
1744   pcB = &cworkB;
1745   if (!v) {
1746     pvA = NULL;
1747     pvB = NULL;
1748   }
1749   if (!idx) {
1750     pcA = NULL;
1751     if (!v) pcB = NULL;
1752   }
1753   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1754   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1755   nztot = nzA + nzB;
1756 
1757   cmap = mat->garray;
1758   if (v || idx) {
1759     if (nztot) {
1760       /* Sort by increasing column numbers, assuming A and B already sorted */
1761       PetscInt imark = -1;
1762       if (v) {
1763         *v = v_p = mat->rowvalues;
1764         for (i = 0; i < nzB; i++) {
1765           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766           else break;
1767         }
1768         imark = i;
1769         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1770         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1771       }
1772       if (idx) {
1773         *idx = idx_p = mat->rowindices;
1774         if (imark > -1) {
1775           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1776         } else {
1777           for (i = 0; i < nzB; i++) {
1778             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1779             else break;
1780           }
1781           imark = i;
1782         }
1783         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1784         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1785       }
1786     } else {
1787       if (idx) *idx = NULL;
1788       if (v) *v = NULL;
1789     }
1790   }
1791   *nz = nztot;
1792   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1793   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1798 {
1799   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1800 
1801   PetscFunctionBegin;
1802   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1803   aij->getrowactive = PETSC_FALSE;
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1808 {
1809   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1810   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1811   PetscInt         i, j, cstart = mat->cmap->rstart;
1812   PetscReal        sum = 0.0;
1813   const MatScalar *v, *amata, *bmata;
1814 
1815   PetscFunctionBegin;
1816   if (aij->size == 1) {
1817     PetscCall(MatNorm(aij->A, type, norm));
1818   } else {
1819     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1820     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1821     if (type == NORM_FROBENIUS) {
1822       v = amata;
1823       for (i = 0; i < amat->nz; i++) {
1824         sum += PetscRealPart(PetscConj(*v) * (*v));
1825         v++;
1826       }
1827       v = bmata;
1828       for (i = 0; i < bmat->nz; i++) {
1829         sum += PetscRealPart(PetscConj(*v) * (*v));
1830         v++;
1831       }
1832       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1833       *norm = PetscSqrtReal(*norm);
1834       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1835     } else if (type == NORM_1) { /* max column norm */
1836       PetscReal *tmp, *tmp2;
1837       PetscInt  *jj, *garray = aij->garray;
1838       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1839       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1840       *norm = 0.0;
1841       v     = amata;
1842       jj    = amat->j;
1843       for (j = 0; j < amat->nz; j++) {
1844         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1845         v++;
1846       }
1847       v  = bmata;
1848       jj = bmat->j;
1849       for (j = 0; j < bmat->nz; j++) {
1850         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1851         v++;
1852       }
1853       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1854       for (j = 0; j < mat->cmap->N; j++) {
1855         if (tmp2[j] > *norm) *norm = tmp2[j];
1856       }
1857       PetscCall(PetscFree(tmp));
1858       PetscCall(PetscFree(tmp2));
1859       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1860     } else if (type == NORM_INFINITY) { /* max row norm */
1861       PetscReal ntemp = 0.0;
1862       for (j = 0; j < aij->A->rmap->n; j++) {
1863         v   = amata + amat->i[j];
1864         sum = 0.0;
1865         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1866           sum += PetscAbsScalar(*v);
1867           v++;
1868         }
1869         v = bmata + bmat->i[j];
1870         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1871           sum += PetscAbsScalar(*v);
1872           v++;
1873         }
1874         if (sum > ntemp) ntemp = sum;
1875       }
1876       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1877       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1878     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1879     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1880     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1881   }
1882   PetscFunctionReturn(PETSC_SUCCESS);
1883 }
1884 
1885 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1886 {
1887   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1888   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1889   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1890   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1891   Mat              B, A_diag, *B_diag;
1892   const MatScalar *pbv, *bv;
1893 
1894   PetscFunctionBegin;
1895   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1896   ma = A->rmap->n;
1897   na = A->cmap->n;
1898   mb = a->B->rmap->n;
1899   nb = a->B->cmap->n;
1900   ai = Aloc->i;
1901   aj = Aloc->j;
1902   bi = Bloc->i;
1903   bj = Bloc->j;
1904   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1905     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1906     PetscSFNode         *oloc;
1907     PETSC_UNUSED PetscSF sf;
1908 
1909     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1910     /* compute d_nnz for preallocation */
1911     PetscCall(PetscArrayzero(d_nnz, na));
1912     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1913     /* compute local off-diagonal contributions */
1914     PetscCall(PetscArrayzero(g_nnz, nb));
1915     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1916     /* map those to global */
1917     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1918     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1919     PetscCall(PetscSFSetFromOptions(sf));
1920     PetscCall(PetscArrayzero(o_nnz, na));
1921     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1922     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1923     PetscCall(PetscSFDestroy(&sf));
1924 
1925     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1926     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1927     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1928     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1929     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1930     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1931   } else {
1932     B = *matout;
1933     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1934   }
1935 
1936   b           = (Mat_MPIAIJ *)B->data;
1937   A_diag      = a->A;
1938   B_diag      = &b->A;
1939   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1940   A_diag_ncol = A_diag->cmap->N;
1941   B_diag_ilen = sub_B_diag->ilen;
1942   B_diag_i    = sub_B_diag->i;
1943 
1944   /* Set ilen for diagonal of B */
1945   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1946 
1947   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1948   very quickly (=without using MatSetValues), because all writes are local. */
1949   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1950   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1951 
1952   /* copy over the B part */
1953   PetscCall(PetscMalloc1(bi[mb], &cols));
1954   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1955   pbv = bv;
1956   row = A->rmap->rstart;
1957   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1958   cols_tmp = cols;
1959   for (i = 0; i < mb; i++) {
1960     ncol = bi[i + 1] - bi[i];
1961     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1962     row++;
1963     if (pbv) pbv += ncol;
1964     if (cols_tmp) cols_tmp += ncol;
1965   }
1966   PetscCall(PetscFree(cols));
1967   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1968 
1969   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1970   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1971   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1972     *matout = B;
1973   } else {
1974     PetscCall(MatHeaderMerge(A, &B));
1975   }
1976   PetscFunctionReturn(PETSC_SUCCESS);
1977 }
1978 
1979 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1980 {
1981   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1982   Mat         a = aij->A, b = aij->B;
1983   PetscInt    s1, s2, s3;
1984 
1985   PetscFunctionBegin;
1986   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1987   if (rr) {
1988     PetscCall(VecGetLocalSize(rr, &s1));
1989     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1990     /* Overlap communication with computation. */
1991     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1992   }
1993   if (ll) {
1994     PetscCall(VecGetLocalSize(ll, &s1));
1995     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1996     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1997   }
1998   /* scale  the diagonal block */
1999   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2000 
2001   if (rr) {
2002     /* Do a scatter end and then right scale the off-diagonal block */
2003     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2004     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2005   }
2006   PetscFunctionReturn(PETSC_SUCCESS);
2007 }
2008 
2009 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2010 {
2011   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2012 
2013   PetscFunctionBegin;
2014   PetscCall(MatSetUnfactored(a->A));
2015   PetscFunctionReturn(PETSC_SUCCESS);
2016 }
2017 
2018 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2019 {
2020   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2021   Mat         a, b, c, d;
2022   PetscBool   flg;
2023 
2024   PetscFunctionBegin;
2025   a = matA->A;
2026   b = matA->B;
2027   c = matB->A;
2028   d = matB->B;
2029 
2030   PetscCall(MatEqual(a, c, &flg));
2031   if (flg) PetscCall(MatEqual(b, d, &flg));
2032   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2033   PetscFunctionReturn(PETSC_SUCCESS);
2034 }
2035 
2036 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2037 {
2038   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2039   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2040 
2041   PetscFunctionBegin;
2042   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2043   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2044     /* because of the column compression in the off-processor part of the matrix a->B,
2045        the number of columns in a->B and b->B may be different, hence we cannot call
2046        the MatCopy() directly on the two parts. If need be, we can provide a more
2047        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2048        then copying the submatrices */
2049     PetscCall(MatCopy_Basic(A, B, str));
2050   } else {
2051     PetscCall(MatCopy(a->A, b->A, str));
2052     PetscCall(MatCopy(a->B, b->B, str));
2053   }
2054   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2055   PetscFunctionReturn(PETSC_SUCCESS);
2056 }
2057 
2058 /*
2059    Computes the number of nonzeros per row needed for preallocation when X and Y
2060    have different nonzero structure.
2061 */
2062 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2063 {
2064   PetscInt i, j, k, nzx, nzy;
2065 
2066   PetscFunctionBegin;
2067   /* Set the number of nonzeros in the new matrix */
2068   for (i = 0; i < m; i++) {
2069     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2070     nzx    = xi[i + 1] - xi[i];
2071     nzy    = yi[i + 1] - yi[i];
2072     nnz[i] = 0;
2073     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2074       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2075       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2076       nnz[i]++;
2077     }
2078     for (; k < nzy; k++) nnz[i]++;
2079   }
2080   PetscFunctionReturn(PETSC_SUCCESS);
2081 }
2082 
2083 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2084 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2085 {
2086   PetscInt    m = Y->rmap->N;
2087   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2088   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2089 
2090   PetscFunctionBegin;
2091   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2092   PetscFunctionReturn(PETSC_SUCCESS);
2093 }
2094 
2095 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2096 {
2097   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2098 
2099   PetscFunctionBegin;
2100   if (str == SAME_NONZERO_PATTERN) {
2101     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2102     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2103   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2104     PetscCall(MatAXPY_Basic(Y, a, X, str));
2105   } else {
2106     Mat       B;
2107     PetscInt *nnz_d, *nnz_o;
2108 
2109     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2110     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2111     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2112     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2113     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2114     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2115     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2116     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2117     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2118     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2119     PetscCall(MatHeaderMerge(Y, &B));
2120     PetscCall(PetscFree(nnz_d));
2121     PetscCall(PetscFree(nnz_o));
2122   }
2123   PetscFunctionReturn(PETSC_SUCCESS);
2124 }
2125 
2126 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2127 
2128 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2129 {
2130   PetscFunctionBegin;
2131   if (PetscDefined(USE_COMPLEX)) {
2132     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2133 
2134     PetscCall(MatConjugate_SeqAIJ(aij->A));
2135     PetscCall(MatConjugate_SeqAIJ(aij->B));
2136   }
2137   PetscFunctionReturn(PETSC_SUCCESS);
2138 }
2139 
2140 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2141 {
2142   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2143 
2144   PetscFunctionBegin;
2145   PetscCall(MatRealPart(a->A));
2146   PetscCall(MatRealPart(a->B));
2147   PetscFunctionReturn(PETSC_SUCCESS);
2148 }
2149 
2150 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2151 {
2152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(MatImaginaryPart(a->A));
2156   PetscCall(MatImaginaryPart(a->B));
2157   PetscFunctionReturn(PETSC_SUCCESS);
2158 }
2159 
2160 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2161 {
2162   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2163   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2164   PetscScalar       *va, *vv;
2165   Vec                vB, vA;
2166   const PetscScalar *vb;
2167 
2168   PetscFunctionBegin;
2169   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2170   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2171 
2172   PetscCall(VecGetArrayWrite(vA, &va));
2173   if (idx) {
2174     for (i = 0; i < m; i++) {
2175       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2176     }
2177   }
2178 
2179   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2180   PetscCall(PetscMalloc1(m, &idxb));
2181   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2182 
2183   PetscCall(VecGetArrayWrite(v, &vv));
2184   PetscCall(VecGetArrayRead(vB, &vb));
2185   for (i = 0; i < m; i++) {
2186     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2187       vv[i] = vb[i];
2188       if (idx) idx[i] = a->garray[idxb[i]];
2189     } else {
2190       vv[i] = va[i];
2191       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2192     }
2193   }
2194   PetscCall(VecRestoreArrayWrite(vA, &vv));
2195   PetscCall(VecRestoreArrayWrite(vA, &va));
2196   PetscCall(VecRestoreArrayRead(vB, &vb));
2197   PetscCall(PetscFree(idxb));
2198   PetscCall(VecDestroy(&vA));
2199   PetscCall(VecDestroy(&vB));
2200   PetscFunctionReturn(PETSC_SUCCESS);
2201 }
2202 
2203 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2204 {
2205   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2206   PetscInt           m = A->rmap->n, n = A->cmap->n;
2207   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2208   PetscInt          *cmap = mat->garray;
2209   PetscInt          *diagIdx, *offdiagIdx;
2210   Vec                diagV, offdiagV;
2211   PetscScalar       *a, *diagA, *offdiagA;
2212   const PetscScalar *ba, *bav;
2213   PetscInt           r, j, col, ncols, *bi, *bj;
2214   Mat                B = mat->B;
2215   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2216 
2217   PetscFunctionBegin;
2218   /* When a process holds entire A and other processes have no entry */
2219   if (A->cmap->N == n) {
2220     PetscCall(VecGetArrayWrite(v, &diagA));
2221     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2222     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2223     PetscCall(VecDestroy(&diagV));
2224     PetscCall(VecRestoreArrayWrite(v, &diagA));
2225     PetscFunctionReturn(PETSC_SUCCESS);
2226   } else if (n == 0) {
2227     if (m) {
2228       PetscCall(VecGetArrayWrite(v, &a));
2229       for (r = 0; r < m; r++) {
2230         a[r] = 0.0;
2231         if (idx) idx[r] = -1;
2232       }
2233       PetscCall(VecRestoreArrayWrite(v, &a));
2234     }
2235     PetscFunctionReturn(PETSC_SUCCESS);
2236   }
2237 
2238   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2239   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2240   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2241   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2242 
2243   /* Get offdiagIdx[] for implicit 0.0 */
2244   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2245   ba = bav;
2246   bi = b->i;
2247   bj = b->j;
2248   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2249   for (r = 0; r < m; r++) {
2250     ncols = bi[r + 1] - bi[r];
2251     if (ncols == A->cmap->N - n) { /* Brow is dense */
2252       offdiagA[r]   = *ba;
2253       offdiagIdx[r] = cmap[0];
2254     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2255       offdiagA[r] = 0.0;
2256 
2257       /* Find first hole in the cmap */
2258       for (j = 0; j < ncols; j++) {
2259         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2260         if (col > j && j < cstart) {
2261           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2262           break;
2263         } else if (col > j + n && j >= cstart) {
2264           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2265           break;
2266         }
2267       }
2268       if (j == ncols && ncols < A->cmap->N - n) {
2269         /* a hole is outside compressed Bcols */
2270         if (ncols == 0) {
2271           if (cstart) {
2272             offdiagIdx[r] = 0;
2273           } else offdiagIdx[r] = cend;
2274         } else { /* ncols > 0 */
2275           offdiagIdx[r] = cmap[ncols - 1] + 1;
2276           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2277         }
2278       }
2279     }
2280 
2281     for (j = 0; j < ncols; j++) {
2282       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2283         offdiagA[r]   = *ba;
2284         offdiagIdx[r] = cmap[*bj];
2285       }
2286       ba++;
2287       bj++;
2288     }
2289   }
2290 
2291   PetscCall(VecGetArrayWrite(v, &a));
2292   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2293   for (r = 0; r < m; ++r) {
2294     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2295       a[r] = diagA[r];
2296       if (idx) idx[r] = cstart + diagIdx[r];
2297     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2298       a[r] = diagA[r];
2299       if (idx) {
2300         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2301           idx[r] = cstart + diagIdx[r];
2302         } else idx[r] = offdiagIdx[r];
2303       }
2304     } else {
2305       a[r] = offdiagA[r];
2306       if (idx) idx[r] = offdiagIdx[r];
2307     }
2308   }
2309   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2310   PetscCall(VecRestoreArrayWrite(v, &a));
2311   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2312   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2313   PetscCall(VecDestroy(&diagV));
2314   PetscCall(VecDestroy(&offdiagV));
2315   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2316   PetscFunctionReturn(PETSC_SUCCESS);
2317 }
2318 
2319 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2320 {
2321   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2322   PetscInt           m = A->rmap->n, n = A->cmap->n;
2323   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2324   PetscInt          *cmap = mat->garray;
2325   PetscInt          *diagIdx, *offdiagIdx;
2326   Vec                diagV, offdiagV;
2327   PetscScalar       *a, *diagA, *offdiagA;
2328   const PetscScalar *ba, *bav;
2329   PetscInt           r, j, col, ncols, *bi, *bj;
2330   Mat                B = mat->B;
2331   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2332 
2333   PetscFunctionBegin;
2334   /* When a process holds entire A and other processes have no entry */
2335   if (A->cmap->N == n) {
2336     PetscCall(VecGetArrayWrite(v, &diagA));
2337     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2338     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2339     PetscCall(VecDestroy(&diagV));
2340     PetscCall(VecRestoreArrayWrite(v, &diagA));
2341     PetscFunctionReturn(PETSC_SUCCESS);
2342   } else if (n == 0) {
2343     if (m) {
2344       PetscCall(VecGetArrayWrite(v, &a));
2345       for (r = 0; r < m; r++) {
2346         a[r] = PETSC_MAX_REAL;
2347         if (idx) idx[r] = -1;
2348       }
2349       PetscCall(VecRestoreArrayWrite(v, &a));
2350     }
2351     PetscFunctionReturn(PETSC_SUCCESS);
2352   }
2353 
2354   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2355   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2356   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2357   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2358 
2359   /* Get offdiagIdx[] for implicit 0.0 */
2360   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2361   ba = bav;
2362   bi = b->i;
2363   bj = b->j;
2364   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2365   for (r = 0; r < m; r++) {
2366     ncols = bi[r + 1] - bi[r];
2367     if (ncols == A->cmap->N - n) { /* Brow is dense */
2368       offdiagA[r]   = *ba;
2369       offdiagIdx[r] = cmap[0];
2370     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2371       offdiagA[r] = 0.0;
2372 
2373       /* Find first hole in the cmap */
2374       for (j = 0; j < ncols; j++) {
2375         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2376         if (col > j && j < cstart) {
2377           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2378           break;
2379         } else if (col > j + n && j >= cstart) {
2380           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2381           break;
2382         }
2383       }
2384       if (j == ncols && ncols < A->cmap->N - n) {
2385         /* a hole is outside compressed Bcols */
2386         if (ncols == 0) {
2387           if (cstart) {
2388             offdiagIdx[r] = 0;
2389           } else offdiagIdx[r] = cend;
2390         } else { /* ncols > 0 */
2391           offdiagIdx[r] = cmap[ncols - 1] + 1;
2392           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2393         }
2394       }
2395     }
2396 
2397     for (j = 0; j < ncols; j++) {
2398       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2399         offdiagA[r]   = *ba;
2400         offdiagIdx[r] = cmap[*bj];
2401       }
2402       ba++;
2403       bj++;
2404     }
2405   }
2406 
2407   PetscCall(VecGetArrayWrite(v, &a));
2408   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2409   for (r = 0; r < m; ++r) {
2410     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2411       a[r] = diagA[r];
2412       if (idx) idx[r] = cstart + diagIdx[r];
2413     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2414       a[r] = diagA[r];
2415       if (idx) {
2416         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2417           idx[r] = cstart + diagIdx[r];
2418         } else idx[r] = offdiagIdx[r];
2419       }
2420     } else {
2421       a[r] = offdiagA[r];
2422       if (idx) idx[r] = offdiagIdx[r];
2423     }
2424   }
2425   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2426   PetscCall(VecRestoreArrayWrite(v, &a));
2427   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2428   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2429   PetscCall(VecDestroy(&diagV));
2430   PetscCall(VecDestroy(&offdiagV));
2431   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2432   PetscFunctionReturn(PETSC_SUCCESS);
2433 }
2434 
2435 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2436 {
2437   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2438   PetscInt           m = A->rmap->n, n = A->cmap->n;
2439   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2440   PetscInt          *cmap = mat->garray;
2441   PetscInt          *diagIdx, *offdiagIdx;
2442   Vec                diagV, offdiagV;
2443   PetscScalar       *a, *diagA, *offdiagA;
2444   const PetscScalar *ba, *bav;
2445   PetscInt           r, j, col, ncols, *bi, *bj;
2446   Mat                B = mat->B;
2447   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2448 
2449   PetscFunctionBegin;
2450   /* When a process holds entire A and other processes have no entry */
2451   if (A->cmap->N == n) {
2452     PetscCall(VecGetArrayWrite(v, &diagA));
2453     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2454     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2455     PetscCall(VecDestroy(&diagV));
2456     PetscCall(VecRestoreArrayWrite(v, &diagA));
2457     PetscFunctionReturn(PETSC_SUCCESS);
2458   } else if (n == 0) {
2459     if (m) {
2460       PetscCall(VecGetArrayWrite(v, &a));
2461       for (r = 0; r < m; r++) {
2462         a[r] = PETSC_MIN_REAL;
2463         if (idx) idx[r] = -1;
2464       }
2465       PetscCall(VecRestoreArrayWrite(v, &a));
2466     }
2467     PetscFunctionReturn(PETSC_SUCCESS);
2468   }
2469 
2470   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2471   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2472   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2473   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2474 
2475   /* Get offdiagIdx[] for implicit 0.0 */
2476   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2477   ba = bav;
2478   bi = b->i;
2479   bj = b->j;
2480   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2481   for (r = 0; r < m; r++) {
2482     ncols = bi[r + 1] - bi[r];
2483     if (ncols == A->cmap->N - n) { /* Brow is dense */
2484       offdiagA[r]   = *ba;
2485       offdiagIdx[r] = cmap[0];
2486     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2487       offdiagA[r] = 0.0;
2488 
2489       /* Find first hole in the cmap */
2490       for (j = 0; j < ncols; j++) {
2491         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2492         if (col > j && j < cstart) {
2493           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2494           break;
2495         } else if (col > j + n && j >= cstart) {
2496           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2497           break;
2498         }
2499       }
2500       if (j == ncols && ncols < A->cmap->N - n) {
2501         /* a hole is outside compressed Bcols */
2502         if (ncols == 0) {
2503           if (cstart) {
2504             offdiagIdx[r] = 0;
2505           } else offdiagIdx[r] = cend;
2506         } else { /* ncols > 0 */
2507           offdiagIdx[r] = cmap[ncols - 1] + 1;
2508           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2509         }
2510       }
2511     }
2512 
2513     for (j = 0; j < ncols; j++) {
2514       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2515         offdiagA[r]   = *ba;
2516         offdiagIdx[r] = cmap[*bj];
2517       }
2518       ba++;
2519       bj++;
2520     }
2521   }
2522 
2523   PetscCall(VecGetArrayWrite(v, &a));
2524   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2525   for (r = 0; r < m; ++r) {
2526     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2527       a[r] = diagA[r];
2528       if (idx) idx[r] = cstart + diagIdx[r];
2529     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2530       a[r] = diagA[r];
2531       if (idx) {
2532         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2533           idx[r] = cstart + diagIdx[r];
2534         } else idx[r] = offdiagIdx[r];
2535       }
2536     } else {
2537       a[r] = offdiagA[r];
2538       if (idx) idx[r] = offdiagIdx[r];
2539     }
2540   }
2541   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2542   PetscCall(VecRestoreArrayWrite(v, &a));
2543   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2544   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2545   PetscCall(VecDestroy(&diagV));
2546   PetscCall(VecDestroy(&offdiagV));
2547   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2548   PetscFunctionReturn(PETSC_SUCCESS);
2549 }
2550 
2551 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2552 {
2553   Mat *dummy;
2554 
2555   PetscFunctionBegin;
2556   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2557   *newmat = *dummy;
2558   PetscCall(PetscFree(dummy));
2559   PetscFunctionReturn(PETSC_SUCCESS);
2560 }
2561 
2562 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2563 {
2564   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2565 
2566   PetscFunctionBegin;
2567   PetscCall(MatInvertBlockDiagonal(a->A, values));
2568   A->factorerrortype = a->A->factorerrortype;
2569   PetscFunctionReturn(PETSC_SUCCESS);
2570 }
2571 
2572 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2573 {
2574   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2575 
2576   PetscFunctionBegin;
2577   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2578   PetscCall(MatSetRandom(aij->A, rctx));
2579   if (x->assembled) {
2580     PetscCall(MatSetRandom(aij->B, rctx));
2581   } else {
2582     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2583   }
2584   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2585   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2586   PetscFunctionReturn(PETSC_SUCCESS);
2587 }
2588 
2589 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2590 {
2591   PetscFunctionBegin;
2592   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2593   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2594   PetscFunctionReturn(PETSC_SUCCESS);
2595 }
2596 
2597 /*@
2598   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2599 
2600   Not Collective
2601 
2602   Input Parameter:
2603 . A - the matrix
2604 
2605   Output Parameter:
2606 . nz - the number of nonzeros
2607 
2608   Level: advanced
2609 
2610 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2611 @*/
2612 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2613 {
2614   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2615   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2616   PetscBool   isaij;
2617 
2618   PetscFunctionBegin;
2619   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2620   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2621   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2622   PetscFunctionReturn(PETSC_SUCCESS);
2623 }
2624 
2625 /*@
2626   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2627 
2628   Collective
2629 
2630   Input Parameters:
2631 + A  - the matrix
2632 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2633 
2634   Level: advanced
2635 
2636 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2637 @*/
2638 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2639 {
2640   PetscFunctionBegin;
2641   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2642   PetscFunctionReturn(PETSC_SUCCESS);
2643 }
2644 
2645 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2646 {
2647   PetscBool sc = PETSC_FALSE, flg;
2648 
2649   PetscFunctionBegin;
2650   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2651   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2652   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2653   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2654   PetscOptionsHeadEnd();
2655   PetscFunctionReturn(PETSC_SUCCESS);
2656 }
2657 
2658 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2659 {
2660   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2661   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2662 
2663   PetscFunctionBegin;
2664   if (!Y->preallocated) {
2665     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2666   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2667     PetscInt nonew = aij->nonew;
2668     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2669     aij->nonew = nonew;
2670   }
2671   PetscCall(MatShift_Basic(Y, a));
2672   PetscFunctionReturn(PETSC_SUCCESS);
2673 }
2674 
2675 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2676 {
2677   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2678 
2679   PetscFunctionBegin;
2680   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2681   PetscCall(MatMissingDiagonal(a->A, missing, d));
2682   if (d) {
2683     PetscInt rstart;
2684     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2685     *d += rstart;
2686   }
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2696   PetscFunctionReturn(PETSC_SUCCESS);
2697 }
2698 
2699 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2700 {
2701   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2702 
2703   PetscFunctionBegin;
2704   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2705   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2706   PetscFunctionReturn(PETSC_SUCCESS);
2707 }
2708 
2709 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2710                                        MatGetRow_MPIAIJ,
2711                                        MatRestoreRow_MPIAIJ,
2712                                        MatMult_MPIAIJ,
2713                                        /* 4*/ MatMultAdd_MPIAIJ,
2714                                        MatMultTranspose_MPIAIJ,
2715                                        MatMultTransposeAdd_MPIAIJ,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        /*10*/ NULL,
2720                                        NULL,
2721                                        NULL,
2722                                        MatSOR_MPIAIJ,
2723                                        MatTranspose_MPIAIJ,
2724                                        /*15*/ MatGetInfo_MPIAIJ,
2725                                        MatEqual_MPIAIJ,
2726                                        MatGetDiagonal_MPIAIJ,
2727                                        MatDiagonalScale_MPIAIJ,
2728                                        MatNorm_MPIAIJ,
2729                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2730                                        MatAssemblyEnd_MPIAIJ,
2731                                        MatSetOption_MPIAIJ,
2732                                        MatZeroEntries_MPIAIJ,
2733                                        /*24*/ MatZeroRows_MPIAIJ,
2734                                        NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        NULL,
2738                                        /*29*/ MatSetUp_MPI_Hash,
2739                                        NULL,
2740                                        NULL,
2741                                        MatGetDiagonalBlock_MPIAIJ,
2742                                        NULL,
2743                                        /*34*/ MatDuplicate_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        NULL,
2748                                        /*39*/ MatAXPY_MPIAIJ,
2749                                        MatCreateSubMatrices_MPIAIJ,
2750                                        MatIncreaseOverlap_MPIAIJ,
2751                                        MatGetValues_MPIAIJ,
2752                                        MatCopy_MPIAIJ,
2753                                        /*44*/ MatGetRowMax_MPIAIJ,
2754                                        MatScale_MPIAIJ,
2755                                        MatShift_MPIAIJ,
2756                                        MatDiagonalSet_MPIAIJ,
2757                                        MatZeroRowsColumns_MPIAIJ,
2758                                        /*49*/ MatSetRandom_MPIAIJ,
2759                                        MatGetRowIJ_MPIAIJ,
2760                                        MatRestoreRowIJ_MPIAIJ,
2761                                        NULL,
2762                                        NULL,
2763                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2764                                        NULL,
2765                                        MatSetUnfactored_MPIAIJ,
2766                                        MatPermute_MPIAIJ,
2767                                        NULL,
2768                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2769                                        MatDestroy_MPIAIJ,
2770                                        MatView_MPIAIJ,
2771                                        NULL,
2772                                        NULL,
2773                                        /*64*/ NULL,
2774                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                        NULL,
2778                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2779                                        MatGetRowMinAbs_MPIAIJ,
2780                                        NULL,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                        /*75*/ MatFDColoringApply_AIJ,
2785                                        MatSetFromOptions_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        MatFindZeroDiagonals_MPIAIJ,
2789                                        /*80*/ NULL,
2790                                        NULL,
2791                                        NULL,
2792                                        /*83*/ MatLoad_MPIAIJ,
2793                                        MatIsSymmetric_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        /*89*/ NULL,
2799                                        NULL,
2800                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        MatBindToCPU_MPIAIJ,
2808                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                        MatConjugate_MPIAIJ,
2812                                        NULL,
2813                                        /*104*/ MatSetValuesRow_MPIAIJ,
2814                                        MatRealPart_MPIAIJ,
2815                                        MatImaginaryPart_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        /*109*/ NULL,
2819                                        NULL,
2820                                        MatGetRowMin_MPIAIJ,
2821                                        NULL,
2822                                        MatMissingDiagonal_MPIAIJ,
2823                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2824                                        NULL,
2825                                        MatGetGhosts_MPIAIJ,
2826                                        NULL,
2827                                        NULL,
2828                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2829                                        NULL,
2830                                        NULL,
2831                                        NULL,
2832                                        MatGetMultiProcBlock_MPIAIJ,
2833                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2834                                        MatGetColumnReductions_MPIAIJ,
2835                                        MatInvertBlockDiagonal_MPIAIJ,
2836                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2837                                        MatCreateSubMatricesMPI_MPIAIJ,
2838                                        /*129*/ NULL,
2839                                        NULL,
2840                                        NULL,
2841                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2842                                        NULL,
2843                                        /*134*/ NULL,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        NULL,
2848                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2849                                        NULL,
2850                                        NULL,
2851                                        MatFDColoringSetUp_MPIXAIJ,
2852                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2853                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2854                                        /*145*/ NULL,
2855                                        NULL,
2856                                        NULL,
2857                                        MatCreateGraph_Simple_AIJ,
2858                                        NULL,
2859                                        /*150*/ NULL,
2860                                        MatEliminateZeros_MPIAIJ};
2861 
2862 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2863 {
2864   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2865 
2866   PetscFunctionBegin;
2867   PetscCall(MatStoreValues(aij->A));
2868   PetscCall(MatStoreValues(aij->B));
2869   PetscFunctionReturn(PETSC_SUCCESS);
2870 }
2871 
2872 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2873 {
2874   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2875 
2876   PetscFunctionBegin;
2877   PetscCall(MatRetrieveValues(aij->A));
2878   PetscCall(MatRetrieveValues(aij->B));
2879   PetscFunctionReturn(PETSC_SUCCESS);
2880 }
2881 
2882 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2883 {
2884   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2885   PetscMPIInt size;
2886 
2887   PetscFunctionBegin;
2888   if (B->hash_active) {
2889     B->ops[0]      = b->cops;
2890     B->hash_active = PETSC_FALSE;
2891   }
2892   PetscCall(PetscLayoutSetUp(B->rmap));
2893   PetscCall(PetscLayoutSetUp(B->cmap));
2894 
2895 #if defined(PETSC_USE_CTABLE)
2896   PetscCall(PetscHMapIDestroy(&b->colmap));
2897 #else
2898   PetscCall(PetscFree(b->colmap));
2899 #endif
2900   PetscCall(PetscFree(b->garray));
2901   PetscCall(VecDestroy(&b->lvec));
2902   PetscCall(VecScatterDestroy(&b->Mvctx));
2903 
2904   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2905   PetscCall(MatDestroy(&b->B));
2906   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2907   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2908   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2909   PetscCall(MatSetType(b->B, MATSEQAIJ));
2910 
2911   PetscCall(MatDestroy(&b->A));
2912   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2913   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2914   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2915   PetscCall(MatSetType(b->A, MATSEQAIJ));
2916 
2917   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2918   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2919   B->preallocated  = PETSC_TRUE;
2920   B->was_assembled = PETSC_FALSE;
2921   B->assembled     = PETSC_FALSE;
2922   PetscFunctionReturn(PETSC_SUCCESS);
2923 }
2924 
2925 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2926 {
2927   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2928 
2929   PetscFunctionBegin;
2930   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2931   PetscCall(PetscLayoutSetUp(B->rmap));
2932   PetscCall(PetscLayoutSetUp(B->cmap));
2933 
2934 #if defined(PETSC_USE_CTABLE)
2935   PetscCall(PetscHMapIDestroy(&b->colmap));
2936 #else
2937   PetscCall(PetscFree(b->colmap));
2938 #endif
2939   PetscCall(PetscFree(b->garray));
2940   PetscCall(VecDestroy(&b->lvec));
2941   PetscCall(VecScatterDestroy(&b->Mvctx));
2942 
2943   PetscCall(MatResetPreallocation(b->A));
2944   PetscCall(MatResetPreallocation(b->B));
2945   B->preallocated  = PETSC_TRUE;
2946   B->was_assembled = PETSC_FALSE;
2947   B->assembled     = PETSC_FALSE;
2948   PetscFunctionReturn(PETSC_SUCCESS);
2949 }
2950 
2951 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2952 {
2953   Mat         mat;
2954   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2955 
2956   PetscFunctionBegin;
2957   *newmat = NULL;
2958   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2959   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2960   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2961   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2962   a = (Mat_MPIAIJ *)mat->data;
2963 
2964   mat->factortype   = matin->factortype;
2965   mat->assembled    = matin->assembled;
2966   mat->insertmode   = NOT_SET_VALUES;
2967   mat->preallocated = matin->preallocated;
2968 
2969   a->size         = oldmat->size;
2970   a->rank         = oldmat->rank;
2971   a->donotstash   = oldmat->donotstash;
2972   a->roworiented  = oldmat->roworiented;
2973   a->rowindices   = NULL;
2974   a->rowvalues    = NULL;
2975   a->getrowactive = PETSC_FALSE;
2976 
2977   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2978   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2979 
2980   if (oldmat->colmap) {
2981 #if defined(PETSC_USE_CTABLE)
2982     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2983 #else
2984     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2985     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2986 #endif
2987   } else a->colmap = NULL;
2988   if (oldmat->garray) {
2989     PetscInt len;
2990     len = oldmat->B->cmap->n;
2991     PetscCall(PetscMalloc1(len + 1, &a->garray));
2992     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2993   } else a->garray = NULL;
2994 
2995   /* It may happen MatDuplicate is called with a non-assembled matrix
2996      In fact, MatDuplicate only requires the matrix to be preallocated
2997      This may happen inside a DMCreateMatrix_Shell */
2998   if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2999   if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3000   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3001   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3002   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3003   *newmat = mat;
3004   PetscFunctionReturn(PETSC_SUCCESS);
3005 }
3006 
3007 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3008 {
3009   PetscBool isbinary, ishdf5;
3010 
3011   PetscFunctionBegin;
3012   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3013   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3014   /* force binary viewer to load .info file if it has not yet done so */
3015   PetscCall(PetscViewerSetUp(viewer));
3016   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3017   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3018   if (isbinary) {
3019     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3020   } else if (ishdf5) {
3021 #if defined(PETSC_HAVE_HDF5)
3022     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3023 #else
3024     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3025 #endif
3026   } else {
3027     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3028   }
3029   PetscFunctionReturn(PETSC_SUCCESS);
3030 }
3031 
3032 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3033 {
3034   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3035   PetscInt    *rowidxs, *colidxs;
3036   PetscScalar *matvals;
3037 
3038   PetscFunctionBegin;
3039   PetscCall(PetscViewerSetUp(viewer));
3040 
3041   /* read in matrix header */
3042   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3043   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3044   M  = header[1];
3045   N  = header[2];
3046   nz = header[3];
3047   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3048   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3049   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3050 
3051   /* set block sizes from the viewer's .info file */
3052   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3053   /* set global sizes if not set already */
3054   if (mat->rmap->N < 0) mat->rmap->N = M;
3055   if (mat->cmap->N < 0) mat->cmap->N = N;
3056   PetscCall(PetscLayoutSetUp(mat->rmap));
3057   PetscCall(PetscLayoutSetUp(mat->cmap));
3058 
3059   /* check if the matrix sizes are correct */
3060   PetscCall(MatGetSize(mat, &rows, &cols));
3061   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3062 
3063   /* read in row lengths and build row indices */
3064   PetscCall(MatGetLocalSize(mat, &m, NULL));
3065   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3066   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3067   rowidxs[0] = 0;
3068   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3069   if (nz != PETSC_MAX_INT) {
3070     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3071     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3072   }
3073 
3074   /* read in column indices and matrix values */
3075   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3076   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3077   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3078   /* store matrix indices and values */
3079   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3080   PetscCall(PetscFree(rowidxs));
3081   PetscCall(PetscFree2(colidxs, matvals));
3082   PetscFunctionReturn(PETSC_SUCCESS);
3083 }
3084 
3085 /* Not scalable because of ISAllGather() unless getting all columns. */
3086 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3087 {
3088   IS          iscol_local;
3089   PetscBool   isstride;
3090   PetscMPIInt lisstride = 0, gisstride;
3091 
3092   PetscFunctionBegin;
3093   /* check if we are grabbing all columns*/
3094   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3095 
3096   if (isstride) {
3097     PetscInt start, len, mstart, mlen;
3098     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3099     PetscCall(ISGetLocalSize(iscol, &len));
3100     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3101     if (mstart == start && mlen - mstart == len) lisstride = 1;
3102   }
3103 
3104   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3105   if (gisstride) {
3106     PetscInt N;
3107     PetscCall(MatGetSize(mat, NULL, &N));
3108     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3109     PetscCall(ISSetIdentity(iscol_local));
3110     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3111   } else {
3112     PetscInt cbs;
3113     PetscCall(ISGetBlockSize(iscol, &cbs));
3114     PetscCall(ISAllGather(iscol, &iscol_local));
3115     PetscCall(ISSetBlockSize(iscol_local, cbs));
3116   }
3117 
3118   *isseq = iscol_local;
3119   PetscFunctionReturn(PETSC_SUCCESS);
3120 }
3121 
3122 /*
3123  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3124  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3125 
3126  Input Parameters:
3127 +   mat - matrix
3128 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3129            i.e., mat->rstart <= isrow[i] < mat->rend
3130 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3131            i.e., mat->cstart <= iscol[i] < mat->cend
3132 
3133  Output Parameters:
3134 +   isrow_d - sequential row index set for retrieving mat->A
3135 .   iscol_d - sequential  column index set for retrieving mat->A
3136 .   iscol_o - sequential column index set for retrieving mat->B
3137 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3138  */
3139 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3140 {
3141   Vec             x, cmap;
3142   const PetscInt *is_idx;
3143   PetscScalar    *xarray, *cmaparray;
3144   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3145   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3146   Mat             B    = a->B;
3147   Vec             lvec = a->lvec, lcmap;
3148   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3149   MPI_Comm        comm;
3150   VecScatter      Mvctx = a->Mvctx;
3151 
3152   PetscFunctionBegin;
3153   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3154   PetscCall(ISGetLocalSize(iscol, &ncols));
3155 
3156   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3157   PetscCall(MatCreateVecs(mat, &x, NULL));
3158   PetscCall(VecSet(x, -1.0));
3159   PetscCall(VecDuplicate(x, &cmap));
3160   PetscCall(VecSet(cmap, -1.0));
3161 
3162   /* Get start indices */
3163   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3164   isstart -= ncols;
3165   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3166 
3167   PetscCall(ISGetIndices(iscol, &is_idx));
3168   PetscCall(VecGetArray(x, &xarray));
3169   PetscCall(VecGetArray(cmap, &cmaparray));
3170   PetscCall(PetscMalloc1(ncols, &idx));
3171   for (i = 0; i < ncols; i++) {
3172     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3173     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3174     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3175   }
3176   PetscCall(VecRestoreArray(x, &xarray));
3177   PetscCall(VecRestoreArray(cmap, &cmaparray));
3178   PetscCall(ISRestoreIndices(iscol, &is_idx));
3179 
3180   /* Get iscol_d */
3181   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3182   PetscCall(ISGetBlockSize(iscol, &i));
3183   PetscCall(ISSetBlockSize(*iscol_d, i));
3184 
3185   /* Get isrow_d */
3186   PetscCall(ISGetLocalSize(isrow, &m));
3187   rstart = mat->rmap->rstart;
3188   PetscCall(PetscMalloc1(m, &idx));
3189   PetscCall(ISGetIndices(isrow, &is_idx));
3190   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3191   PetscCall(ISRestoreIndices(isrow, &is_idx));
3192 
3193   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3194   PetscCall(ISGetBlockSize(isrow, &i));
3195   PetscCall(ISSetBlockSize(*isrow_d, i));
3196 
3197   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3198   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3199   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3200 
3201   PetscCall(VecDuplicate(lvec, &lcmap));
3202 
3203   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3204   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3205 
3206   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3207   /* off-process column indices */
3208   count = 0;
3209   PetscCall(PetscMalloc1(Bn, &idx));
3210   PetscCall(PetscMalloc1(Bn, &cmap1));
3211 
3212   PetscCall(VecGetArray(lvec, &xarray));
3213   PetscCall(VecGetArray(lcmap, &cmaparray));
3214   for (i = 0; i < Bn; i++) {
3215     if (PetscRealPart(xarray[i]) > -1.0) {
3216       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3217       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3218       count++;
3219     }
3220   }
3221   PetscCall(VecRestoreArray(lvec, &xarray));
3222   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3223 
3224   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3225   /* cannot ensure iscol_o has same blocksize as iscol! */
3226 
3227   PetscCall(PetscFree(idx));
3228   *garray = cmap1;
3229 
3230   PetscCall(VecDestroy(&x));
3231   PetscCall(VecDestroy(&cmap));
3232   PetscCall(VecDestroy(&lcmap));
3233   PetscFunctionReturn(PETSC_SUCCESS);
3234 }
3235 
3236 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3237 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3238 {
3239   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3240   Mat         M = NULL;
3241   MPI_Comm    comm;
3242   IS          iscol_d, isrow_d, iscol_o;
3243   Mat         Asub = NULL, Bsub = NULL;
3244   PetscInt    n;
3245 
3246   PetscFunctionBegin;
3247   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3248 
3249   if (call == MAT_REUSE_MATRIX) {
3250     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3251     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3252     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3253 
3254     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3255     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3256 
3257     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3258     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3259 
3260     /* Update diagonal and off-diagonal portions of submat */
3261     asub = (Mat_MPIAIJ *)(*submat)->data;
3262     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3263     PetscCall(ISGetLocalSize(iscol_o, &n));
3264     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3265     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3266     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3267 
3268   } else { /* call == MAT_INITIAL_MATRIX) */
3269     const PetscInt *garray;
3270     PetscInt        BsubN;
3271 
3272     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3273     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3274 
3275     /* Create local submatrices Asub and Bsub */
3276     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3277     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3278 
3279     /* Create submatrix M */
3280     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3281 
3282     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3283     asub = (Mat_MPIAIJ *)M->data;
3284 
3285     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3286     n = asub->B->cmap->N;
3287     if (BsubN > n) {
3288       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3289       const PetscInt *idx;
3290       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3291       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3292 
3293       PetscCall(PetscMalloc1(n, &idx_new));
3294       j = 0;
3295       PetscCall(ISGetIndices(iscol_o, &idx));
3296       for (i = 0; i < n; i++) {
3297         if (j >= BsubN) break;
3298         while (subgarray[i] > garray[j]) j++;
3299 
3300         if (subgarray[i] == garray[j]) {
3301           idx_new[i] = idx[j++];
3302         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3303       }
3304       PetscCall(ISRestoreIndices(iscol_o, &idx));
3305 
3306       PetscCall(ISDestroy(&iscol_o));
3307       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3308 
3309     } else if (BsubN < n) {
3310       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3311     }
3312 
3313     PetscCall(PetscFree(garray));
3314     *submat = M;
3315 
3316     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3317     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3318     PetscCall(ISDestroy(&isrow_d));
3319 
3320     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3321     PetscCall(ISDestroy(&iscol_d));
3322 
3323     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3324     PetscCall(ISDestroy(&iscol_o));
3325   }
3326   PetscFunctionReturn(PETSC_SUCCESS);
3327 }
3328 
3329 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3330 {
3331   IS        iscol_local = NULL, isrow_d;
3332   PetscInt  csize;
3333   PetscInt  n, i, j, start, end;
3334   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3335   MPI_Comm  comm;
3336 
3337   PetscFunctionBegin;
3338   /* If isrow has same processor distribution as mat,
3339      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3340   if (call == MAT_REUSE_MATRIX) {
3341     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3342     if (isrow_d) {
3343       sameRowDist  = PETSC_TRUE;
3344       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3345     } else {
3346       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3347       if (iscol_local) {
3348         sameRowDist  = PETSC_TRUE;
3349         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3350       }
3351     }
3352   } else {
3353     /* Check if isrow has same processor distribution as mat */
3354     sameDist[0] = PETSC_FALSE;
3355     PetscCall(ISGetLocalSize(isrow, &n));
3356     if (!n) {
3357       sameDist[0] = PETSC_TRUE;
3358     } else {
3359       PetscCall(ISGetMinMax(isrow, &i, &j));
3360       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3361       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3362     }
3363 
3364     /* Check if iscol has same processor distribution as mat */
3365     sameDist[1] = PETSC_FALSE;
3366     PetscCall(ISGetLocalSize(iscol, &n));
3367     if (!n) {
3368       sameDist[1] = PETSC_TRUE;
3369     } else {
3370       PetscCall(ISGetMinMax(iscol, &i, &j));
3371       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3372       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3373     }
3374 
3375     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3376     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3377     sameRowDist = tsameDist[0];
3378   }
3379 
3380   if (sameRowDist) {
3381     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3382       /* isrow and iscol have same processor distribution as mat */
3383       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3384       PetscFunctionReturn(PETSC_SUCCESS);
3385     } else { /* sameRowDist */
3386       /* isrow has same processor distribution as mat */
3387       if (call == MAT_INITIAL_MATRIX) {
3388         PetscBool sorted;
3389         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3390         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3391         PetscCall(ISGetSize(iscol, &i));
3392         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3393 
3394         PetscCall(ISSorted(iscol_local, &sorted));
3395         if (sorted) {
3396           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3397           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3398           PetscFunctionReturn(PETSC_SUCCESS);
3399         }
3400       } else { /* call == MAT_REUSE_MATRIX */
3401         IS iscol_sub;
3402         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3403         if (iscol_sub) {
3404           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3405           PetscFunctionReturn(PETSC_SUCCESS);
3406         }
3407       }
3408     }
3409   }
3410 
3411   /* General case: iscol -> iscol_local which has global size of iscol */
3412   if (call == MAT_REUSE_MATRIX) {
3413     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3414     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3415   } else {
3416     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3417   }
3418 
3419   PetscCall(ISGetLocalSize(iscol, &csize));
3420   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3421 
3422   if (call == MAT_INITIAL_MATRIX) {
3423     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3424     PetscCall(ISDestroy(&iscol_local));
3425   }
3426   PetscFunctionReturn(PETSC_SUCCESS);
3427 }
3428 
3429 /*@C
3430   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3431   and "off-diagonal" part of the matrix in CSR format.
3432 
3433   Collective
3434 
3435   Input Parameters:
3436 + comm   - MPI communicator
3437 . A      - "diagonal" portion of matrix
3438 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3439 - garray - global index of `B` columns
3440 
3441   Output Parameter:
3442 . mat - the matrix, with input `A` as its local diagonal matrix
3443 
3444   Level: advanced
3445 
3446   Notes:
3447   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3448 
3449   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3450 
3451 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3452 @*/
3453 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3454 {
3455   Mat_MPIAIJ        *maij;
3456   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3457   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3458   const PetscScalar *oa;
3459   Mat                Bnew;
3460   PetscInt           m, n, N;
3461   MatType            mpi_mat_type;
3462 
3463   PetscFunctionBegin;
3464   PetscCall(MatCreate(comm, mat));
3465   PetscCall(MatGetSize(A, &m, &n));
3466   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3467   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3468   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3469   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3470 
3471   /* Get global columns of mat */
3472   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3473 
3474   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3475   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3476   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3477   PetscCall(MatSetType(*mat, mpi_mat_type));
3478 
3479   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3480   maij = (Mat_MPIAIJ *)(*mat)->data;
3481 
3482   (*mat)->preallocated = PETSC_TRUE;
3483 
3484   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3485   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3486 
3487   /* Set A as diagonal portion of *mat */
3488   maij->A = A;
3489 
3490   nz = oi[m];
3491   for (i = 0; i < nz; i++) {
3492     col   = oj[i];
3493     oj[i] = garray[col];
3494   }
3495 
3496   /* Set Bnew as off-diagonal portion of *mat */
3497   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3498   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3499   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3500   bnew        = (Mat_SeqAIJ *)Bnew->data;
3501   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3502   maij->B     = Bnew;
3503 
3504   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3505 
3506   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3507   b->free_a       = PETSC_FALSE;
3508   b->free_ij      = PETSC_FALSE;
3509   PetscCall(MatDestroy(&B));
3510 
3511   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3512   bnew->free_a       = PETSC_TRUE;
3513   bnew->free_ij      = PETSC_TRUE;
3514 
3515   /* condense columns of maij->B */
3516   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3517   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3518   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3519   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3520   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3521   PetscFunctionReturn(PETSC_SUCCESS);
3522 }
3523 
3524 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3525 
3526 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3527 {
3528   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3529   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3530   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3531   Mat             M, Msub, B = a->B;
3532   MatScalar      *aa;
3533   Mat_SeqAIJ     *aij;
3534   PetscInt       *garray = a->garray, *colsub, Ncols;
3535   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3536   IS              iscol_sub, iscmap;
3537   const PetscInt *is_idx, *cmap;
3538   PetscBool       allcolumns = PETSC_FALSE;
3539   MPI_Comm        comm;
3540 
3541   PetscFunctionBegin;
3542   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3543   if (call == MAT_REUSE_MATRIX) {
3544     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3545     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3546     PetscCall(ISGetLocalSize(iscol_sub, &count));
3547 
3548     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3549     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3550 
3551     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3552     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3553 
3554     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3555 
3556   } else { /* call == MAT_INITIAL_MATRIX) */
3557     PetscBool flg;
3558 
3559     PetscCall(ISGetLocalSize(iscol, &n));
3560     PetscCall(ISGetSize(iscol, &Ncols));
3561 
3562     /* (1) iscol -> nonscalable iscol_local */
3563     /* Check for special case: each processor gets entire matrix columns */
3564     PetscCall(ISIdentity(iscol_local, &flg));
3565     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3566     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3567     if (allcolumns) {
3568       iscol_sub = iscol_local;
3569       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3570       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3571 
3572     } else {
3573       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3574       PetscInt *idx, *cmap1, k;
3575       PetscCall(PetscMalloc1(Ncols, &idx));
3576       PetscCall(PetscMalloc1(Ncols, &cmap1));
3577       PetscCall(ISGetIndices(iscol_local, &is_idx));
3578       count = 0;
3579       k     = 0;
3580       for (i = 0; i < Ncols; i++) {
3581         j = is_idx[i];
3582         if (j >= cstart && j < cend) {
3583           /* diagonal part of mat */
3584           idx[count]     = j;
3585           cmap1[count++] = i; /* column index in submat */
3586         } else if (Bn) {
3587           /* off-diagonal part of mat */
3588           if (j == garray[k]) {
3589             idx[count]     = j;
3590             cmap1[count++] = i; /* column index in submat */
3591           } else if (j > garray[k]) {
3592             while (j > garray[k] && k < Bn - 1) k++;
3593             if (j == garray[k]) {
3594               idx[count]     = j;
3595               cmap1[count++] = i; /* column index in submat */
3596             }
3597           }
3598         }
3599       }
3600       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3601 
3602       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3603       PetscCall(ISGetBlockSize(iscol, &cbs));
3604       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3605 
3606       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3607     }
3608 
3609     /* (3) Create sequential Msub */
3610     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3611   }
3612 
3613   PetscCall(ISGetLocalSize(iscol_sub, &count));
3614   aij = (Mat_SeqAIJ *)(Msub)->data;
3615   ii  = aij->i;
3616   PetscCall(ISGetIndices(iscmap, &cmap));
3617 
3618   /*
3619       m - number of local rows
3620       Ncols - number of columns (same on all processors)
3621       rstart - first row in new global matrix generated
3622   */
3623   PetscCall(MatGetSize(Msub, &m, NULL));
3624 
3625   if (call == MAT_INITIAL_MATRIX) {
3626     /* (4) Create parallel newmat */
3627     PetscMPIInt rank, size;
3628     PetscInt    csize;
3629 
3630     PetscCallMPI(MPI_Comm_size(comm, &size));
3631     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3632 
3633     /*
3634         Determine the number of non-zeros in the diagonal and off-diagonal
3635         portions of the matrix in order to do correct preallocation
3636     */
3637 
3638     /* first get start and end of "diagonal" columns */
3639     PetscCall(ISGetLocalSize(iscol, &csize));
3640     if (csize == PETSC_DECIDE) {
3641       PetscCall(ISGetSize(isrow, &mglobal));
3642       if (mglobal == Ncols) { /* square matrix */
3643         nlocal = m;
3644       } else {
3645         nlocal = Ncols / size + ((Ncols % size) > rank);
3646       }
3647     } else {
3648       nlocal = csize;
3649     }
3650     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3651     rstart = rend - nlocal;
3652     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3653 
3654     /* next, compute all the lengths */
3655     jj = aij->j;
3656     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3657     olens = dlens + m;
3658     for (i = 0; i < m; i++) {
3659       jend = ii[i + 1] - ii[i];
3660       olen = 0;
3661       dlen = 0;
3662       for (j = 0; j < jend; j++) {
3663         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3664         else dlen++;
3665         jj++;
3666       }
3667       olens[i] = olen;
3668       dlens[i] = dlen;
3669     }
3670 
3671     PetscCall(ISGetBlockSize(isrow, &bs));
3672     PetscCall(ISGetBlockSize(iscol, &cbs));
3673 
3674     PetscCall(MatCreate(comm, &M));
3675     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3676     PetscCall(MatSetBlockSizes(M, bs, cbs));
3677     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3678     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3679     PetscCall(PetscFree(dlens));
3680 
3681   } else { /* call == MAT_REUSE_MATRIX */
3682     M = *newmat;
3683     PetscCall(MatGetLocalSize(M, &i, NULL));
3684     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3685     PetscCall(MatZeroEntries(M));
3686     /*
3687          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3688        rather than the slower MatSetValues().
3689     */
3690     M->was_assembled = PETSC_TRUE;
3691     M->assembled     = PETSC_FALSE;
3692   }
3693 
3694   /* (5) Set values of Msub to *newmat */
3695   PetscCall(PetscMalloc1(count, &colsub));
3696   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3697 
3698   jj = aij->j;
3699   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3700   for (i = 0; i < m; i++) {
3701     row = rstart + i;
3702     nz  = ii[i + 1] - ii[i];
3703     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3704     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3705     jj += nz;
3706     aa += nz;
3707   }
3708   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3709   PetscCall(ISRestoreIndices(iscmap, &cmap));
3710 
3711   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3712   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3713 
3714   PetscCall(PetscFree(colsub));
3715 
3716   /* save Msub, iscol_sub and iscmap used in processor for next request */
3717   if (call == MAT_INITIAL_MATRIX) {
3718     *newmat = M;
3719     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3720     PetscCall(MatDestroy(&Msub));
3721 
3722     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3723     PetscCall(ISDestroy(&iscol_sub));
3724 
3725     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3726     PetscCall(ISDestroy(&iscmap));
3727 
3728     if (iscol_local) {
3729       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3730       PetscCall(ISDestroy(&iscol_local));
3731     }
3732   }
3733   PetscFunctionReturn(PETSC_SUCCESS);
3734 }
3735 
3736 /*
3737     Not great since it makes two copies of the submatrix, first an SeqAIJ
3738   in local and then by concatenating the local matrices the end result.
3739   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3740 
3741   This requires a sequential iscol with all indices.
3742 */
3743 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3744 {
3745   PetscMPIInt rank, size;
3746   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3747   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3748   Mat         M, Mreuse;
3749   MatScalar  *aa, *vwork;
3750   MPI_Comm    comm;
3751   Mat_SeqAIJ *aij;
3752   PetscBool   colflag, allcolumns = PETSC_FALSE;
3753 
3754   PetscFunctionBegin;
3755   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3756   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3757   PetscCallMPI(MPI_Comm_size(comm, &size));
3758 
3759   /* Check for special case: each processor gets entire matrix columns */
3760   PetscCall(ISIdentity(iscol, &colflag));
3761   PetscCall(ISGetLocalSize(iscol, &n));
3762   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3763   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3764 
3765   if (call == MAT_REUSE_MATRIX) {
3766     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3767     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3768     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3769   } else {
3770     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3771   }
3772 
3773   /*
3774       m - number of local rows
3775       n - number of columns (same on all processors)
3776       rstart - first row in new global matrix generated
3777   */
3778   PetscCall(MatGetSize(Mreuse, &m, &n));
3779   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3780   if (call == MAT_INITIAL_MATRIX) {
3781     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3782     ii  = aij->i;
3783     jj  = aij->j;
3784 
3785     /*
3786         Determine the number of non-zeros in the diagonal and off-diagonal
3787         portions of the matrix in order to do correct preallocation
3788     */
3789 
3790     /* first get start and end of "diagonal" columns */
3791     if (csize == PETSC_DECIDE) {
3792       PetscCall(ISGetSize(isrow, &mglobal));
3793       if (mglobal == n) { /* square matrix */
3794         nlocal = m;
3795       } else {
3796         nlocal = n / size + ((n % size) > rank);
3797       }
3798     } else {
3799       nlocal = csize;
3800     }
3801     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3802     rstart = rend - nlocal;
3803     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3804 
3805     /* next, compute all the lengths */
3806     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3807     olens = dlens + m;
3808     for (i = 0; i < m; i++) {
3809       jend = ii[i + 1] - ii[i];
3810       olen = 0;
3811       dlen = 0;
3812       for (j = 0; j < jend; j++) {
3813         if (*jj < rstart || *jj >= rend) olen++;
3814         else dlen++;
3815         jj++;
3816       }
3817       olens[i] = olen;
3818       dlens[i] = dlen;
3819     }
3820     PetscCall(MatCreate(comm, &M));
3821     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3822     PetscCall(MatSetBlockSizes(M, bs, cbs));
3823     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3824     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3825     PetscCall(PetscFree(dlens));
3826   } else {
3827     PetscInt ml, nl;
3828 
3829     M = *newmat;
3830     PetscCall(MatGetLocalSize(M, &ml, &nl));
3831     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3832     PetscCall(MatZeroEntries(M));
3833     /*
3834          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3835        rather than the slower MatSetValues().
3836     */
3837     M->was_assembled = PETSC_TRUE;
3838     M->assembled     = PETSC_FALSE;
3839   }
3840   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3841   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3842   ii  = aij->i;
3843   jj  = aij->j;
3844 
3845   /* trigger copy to CPU if needed */
3846   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3847   for (i = 0; i < m; i++) {
3848     row   = rstart + i;
3849     nz    = ii[i + 1] - ii[i];
3850     cwork = jj;
3851     jj += nz;
3852     vwork = aa;
3853     aa += nz;
3854     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3855   }
3856   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3857 
3858   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3859   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3860   *newmat = M;
3861 
3862   /* save submatrix used in processor for next request */
3863   if (call == MAT_INITIAL_MATRIX) {
3864     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3865     PetscCall(MatDestroy(&Mreuse));
3866   }
3867   PetscFunctionReturn(PETSC_SUCCESS);
3868 }
3869 
3870 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3871 {
3872   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3873   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3874   const PetscInt *JJ;
3875   PetscBool       nooffprocentries;
3876   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3877 
3878   PetscFunctionBegin;
3879   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3880 
3881   PetscCall(PetscLayoutSetUp(B->rmap));
3882   PetscCall(PetscLayoutSetUp(B->cmap));
3883   m      = B->rmap->n;
3884   cstart = B->cmap->rstart;
3885   cend   = B->cmap->rend;
3886   rstart = B->rmap->rstart;
3887 
3888   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3889 
3890   if (PetscDefined(USE_DEBUG)) {
3891     for (i = 0; i < m; i++) {
3892       nnz = Ii[i + 1] - Ii[i];
3893       JJ  = J ? J + Ii[i] : NULL;
3894       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3895       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3896       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3897     }
3898   }
3899 
3900   for (i = 0; i < m; i++) {
3901     nnz     = Ii[i + 1] - Ii[i];
3902     JJ      = J ? J + Ii[i] : NULL;
3903     nnz_max = PetscMax(nnz_max, nnz);
3904     d       = 0;
3905     for (j = 0; j < nnz; j++) {
3906       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3907     }
3908     d_nnz[i] = d;
3909     o_nnz[i] = nnz - d;
3910   }
3911   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3912   PetscCall(PetscFree2(d_nnz, o_nnz));
3913 
3914   for (i = 0; i < m; i++) {
3915     ii = i + rstart;
3916     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J ? J + Ii[i] : NULL, v ? v + Ii[i] : NULL, INSERT_VALUES));
3917   }
3918   nooffprocentries    = B->nooffprocentries;
3919   B->nooffprocentries = PETSC_TRUE;
3920   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3921   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3922   B->nooffprocentries = nooffprocentries;
3923 
3924   /* count number of entries below block diagonal */
3925   PetscCall(PetscFree(Aij->ld));
3926   PetscCall(PetscCalloc1(m, &ld));
3927   Aij->ld = ld;
3928   for (i = 0; i < m; i++) {
3929     nnz = Ii[i + 1] - Ii[i];
3930     j   = 0;
3931     while (j < nnz && J[j] < cstart) j++;
3932     ld[i] = j;
3933     if (J) J += nnz;
3934   }
3935 
3936   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3937   PetscFunctionReturn(PETSC_SUCCESS);
3938 }
3939 
3940 /*@
3941   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3942   (the default parallel PETSc format).
3943 
3944   Collective
3945 
3946   Input Parameters:
3947 + B - the matrix
3948 . i - the indices into j for the start of each local row (starts with zero)
3949 . j - the column indices for each local row (starts with zero)
3950 - v - optional values in the matrix
3951 
3952   Level: developer
3953 
3954   Notes:
3955   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3956   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3957   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3958 
3959   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3960 
3961   The format which is used for the sparse matrix input, is equivalent to a
3962   row-major ordering.. i.e for the following matrix, the input data expected is
3963   as shown
3964 
3965 .vb
3966         1 0 0
3967         2 0 3     P0
3968        -------
3969         4 5 6     P1
3970 
3971      Process0 [P0] rows_owned=[0,1]
3972         i =  {0,1,3}  [size = nrow+1  = 2+1]
3973         j =  {0,0,2}  [size = 3]
3974         v =  {1,2,3}  [size = 3]
3975 
3976      Process1 [P1] rows_owned=[2]
3977         i =  {0,3}    [size = nrow+1  = 1+1]
3978         j =  {0,1,2}  [size = 3]
3979         v =  {4,5,6}  [size = 3]
3980 .ve
3981 
3982 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
3983           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3984 @*/
3985 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3986 {
3987   PetscFunctionBegin;
3988   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3989   PetscFunctionReturn(PETSC_SUCCESS);
3990 }
3991 
3992 /*@C
3993   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3994   (the default parallel PETSc format).  For good matrix assembly performance
3995   the user should preallocate the matrix storage by setting the parameters
3996   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
3997 
3998   Collective
3999 
4000   Input Parameters:
4001 + B     - the matrix
4002 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4003            (same value is used for all local rows)
4004 . d_nnz - array containing the number of nonzeros in the various rows of the
4005            DIAGONAL portion of the local submatrix (possibly different for each row)
4006            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4007            The size of this array is equal to the number of local rows, i.e 'm'.
4008            For matrices that will be factored, you must leave room for (and set)
4009            the diagonal entry even if it is zero.
4010 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4011            submatrix (same value is used for all local rows).
4012 - o_nnz - array containing the number of nonzeros in the various rows of the
4013            OFF-DIAGONAL portion of the local submatrix (possibly different for
4014            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4015            structure. The size of this array is equal to the number
4016            of local rows, i.e 'm'.
4017 
4018   Example Usage:
4019   Consider the following 8x8 matrix with 34 non-zero values, that is
4020   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4021   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4022   as follows
4023 
4024 .vb
4025             1  2  0  |  0  3  0  |  0  4
4026     Proc0   0  5  6  |  7  0  0  |  8  0
4027             9  0 10  | 11  0  0  | 12  0
4028     -------------------------------------
4029            13  0 14  | 15 16 17  |  0  0
4030     Proc1   0 18  0  | 19 20 21  |  0  0
4031             0  0  0  | 22 23  0  | 24  0
4032     -------------------------------------
4033     Proc2  25 26 27  |  0  0 28  | 29  0
4034            30  0  0  | 31 32 33  |  0 34
4035 .ve
4036 
4037   This can be represented as a collection of submatrices as
4038 .vb
4039       A B C
4040       D E F
4041       G H I
4042 .ve
4043 
4044   Where the submatrices A,B,C are owned by proc0, D,E,F are
4045   owned by proc1, G,H,I are owned by proc2.
4046 
4047   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4048   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4049   The 'M','N' parameters are 8,8, and have the same values on all procs.
4050 
4051   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4052   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4053   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4054   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4055   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4056   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4057 
4058   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4059   allocated for every row of the local diagonal submatrix, and `o_nz`
4060   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4061   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4062   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4063   In this case, the values of `d_nz`, `o_nz` are
4064 .vb
4065      proc0  dnz = 2, o_nz = 2
4066      proc1  dnz = 3, o_nz = 2
4067      proc2  dnz = 1, o_nz = 4
4068 .ve
4069   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4070   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4071   for proc3. i.e we are using 12+15+10=37 storage locations to store
4072   34 values.
4073 
4074   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4075   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4076   In the above case the values for `d_nnz`, `o_nnz` are
4077 .vb
4078      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4079      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4080      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4081 .ve
4082   Here the space allocated is sum of all the above values i.e 34, and
4083   hence pre-allocation is perfect.
4084 
4085   Level: intermediate
4086 
4087   Notes:
4088   If the *_nnz parameter is given then the *_nz parameter is ignored
4089 
4090   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4091   storage.  The stored row and column indices begin with zero.
4092   See [Sparse Matrices](sec_matsparse) for details.
4093 
4094   The parallel matrix is partitioned such that the first m0 rows belong to
4095   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4096   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4097 
4098   The DIAGONAL portion of the local submatrix of a processor can be defined
4099   as the submatrix which is obtained by extraction the part corresponding to
4100   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4101   first row that belongs to the processor, r2 is the last row belonging to
4102   the this processor, and c1-c2 is range of indices of the local part of a
4103   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4104   common case of a square matrix, the row and column ranges are the same and
4105   the DIAGONAL part is also square. The remaining portion of the local
4106   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4107 
4108   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4109 
4110   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4111   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4112   You can also run with the option `-info` and look for messages with the string
4113   malloc in them to see if additional memory allocation was needed.
4114 
4115 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4116           `MatGetInfo()`, `PetscSplitOwnership()`
4117 @*/
4118 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4119 {
4120   PetscFunctionBegin;
4121   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4122   PetscValidType(B, 1);
4123   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4124   PetscFunctionReturn(PETSC_SUCCESS);
4125 }
4126 
4127 /*@
4128   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4129   CSR format for the local rows.
4130 
4131   Collective
4132 
4133   Input Parameters:
4134 + comm - MPI communicator
4135 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4136 . n    - This value should be the same as the local size used in creating the
4137        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4138        calculated if N is given) For square matrices n is almost always m.
4139 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4140 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4141 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4142 . j    - column indices
4143 - a    - optional matrix values
4144 
4145   Output Parameter:
4146 . mat - the matrix
4147 
4148   Level: intermediate
4149 
4150   Notes:
4151   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4152   thus you CANNOT change the matrix entries by changing the values of a[] after you have
4153   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4154 
4155   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4156 
4157   The format which is used for the sparse matrix input, is equivalent to a
4158   row-major ordering.. i.e for the following matrix, the input data expected is
4159   as shown
4160 
4161   Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4162 .vb
4163         1 0 0
4164         2 0 3     P0
4165        -------
4166         4 5 6     P1
4167 
4168      Process0 [P0] rows_owned=[0,1]
4169         i =  {0,1,3}  [size = nrow+1  = 2+1]
4170         j =  {0,0,2}  [size = 3]
4171         v =  {1,2,3}  [size = 3]
4172 
4173      Process1 [P1] rows_owned=[2]
4174         i =  {0,3}    [size = nrow+1  = 1+1]
4175         j =  {0,1,2}  [size = 3]
4176         v =  {4,5,6}  [size = 3]
4177 .ve
4178 
4179 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4180           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4181 @*/
4182 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4183 {
4184   PetscFunctionBegin;
4185   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4186   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4187   PetscCall(MatCreate(comm, mat));
4188   PetscCall(MatSetSizes(*mat, m, n, M, N));
4189   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4190   PetscCall(MatSetType(*mat, MATMPIAIJ));
4191   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4192   PetscFunctionReturn(PETSC_SUCCESS);
4193 }
4194 
4195 /*@
4196   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4197   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4198   from `MatCreateMPIAIJWithArrays()`
4199 
4200   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4201 
4202   Collective
4203 
4204   Input Parameters:
4205 + mat - the matrix
4206 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4207 . n   - This value should be the same as the local size used in creating the
4208        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4209        calculated if N is given) For square matrices n is almost always m.
4210 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4211 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4212 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4213 . J   - column indices
4214 - v   - matrix values
4215 
4216   Level: deprecated
4217 
4218 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4219           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`
4220 @*/
4221 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4222 {
4223   PetscInt        nnz, i;
4224   PetscBool       nooffprocentries;
4225   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4226   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4227   PetscScalar    *ad, *ao;
4228   PetscInt        ldi, Iii, md;
4229   const PetscInt *Adi = Ad->i;
4230   PetscInt       *ld  = Aij->ld;
4231 
4232   PetscFunctionBegin;
4233   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4234   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4235   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4236   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4237 
4238   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4239   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4240 
4241   for (i = 0; i < m; i++) {
4242     nnz = Ii[i + 1] - Ii[i];
4243     Iii = Ii[i];
4244     ldi = ld[i];
4245     md  = Adi[i + 1] - Adi[i];
4246     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4247     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4248     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4249     ad += md;
4250     ao += nnz - md;
4251   }
4252   nooffprocentries      = mat->nooffprocentries;
4253   mat->nooffprocentries = PETSC_TRUE;
4254   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4255   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4256   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4257   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4258   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4259   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4260   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4261   mat->nooffprocentries = nooffprocentries;
4262   PetscFunctionReturn(PETSC_SUCCESS);
4263 }
4264 
4265 /*@
4266   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4267 
4268   Collective
4269 
4270   Input Parameters:
4271 + mat - the matrix
4272 - v   - matrix values, stored by row
4273 
4274   Level: intermediate
4275 
4276   Note:
4277   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4278 
4279 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4280           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4281 @*/
4282 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4283 {
4284   PetscInt        nnz, i, m;
4285   PetscBool       nooffprocentries;
4286   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4287   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4288   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4289   PetscScalar    *ad, *ao;
4290   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4291   PetscInt        ldi, Iii, md;
4292   PetscInt       *ld = Aij->ld;
4293 
4294   PetscFunctionBegin;
4295   m = mat->rmap->n;
4296 
4297   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4298   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4299   Iii = 0;
4300   for (i = 0; i < m; i++) {
4301     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4302     ldi = ld[i];
4303     md  = Adi[i + 1] - Adi[i];
4304     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4305     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4306     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4307     ad += md;
4308     ao += nnz - md;
4309     Iii += nnz;
4310   }
4311   nooffprocentries      = mat->nooffprocentries;
4312   mat->nooffprocentries = PETSC_TRUE;
4313   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4314   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4315   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4316   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4317   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4318   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4319   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4320   mat->nooffprocentries = nooffprocentries;
4321   PetscFunctionReturn(PETSC_SUCCESS);
4322 }
4323 
4324 /*@C
4325   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4326   (the default parallel PETSc format).  For good matrix assembly performance
4327   the user should preallocate the matrix storage by setting the parameters
4328   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4329 
4330   Collective
4331 
4332   Input Parameters:
4333 + comm  - MPI communicator
4334 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4335            This value should be the same as the local size used in creating the
4336            y vector for the matrix-vector product y = Ax.
4337 . n     - This value should be the same as the local size used in creating the
4338        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4339        calculated if N is given) For square matrices n is almost always m.
4340 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4341 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4342 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4343            (same value is used for all local rows)
4344 . d_nnz - array containing the number of nonzeros in the various rows of the
4345            DIAGONAL portion of the local submatrix (possibly different for each row)
4346            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4347            The size of this array is equal to the number of local rows, i.e 'm'.
4348 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4349            submatrix (same value is used for all local rows).
4350 - o_nnz - array containing the number of nonzeros in the various rows of the
4351            OFF-DIAGONAL portion of the local submatrix (possibly different for
4352            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4353            structure. The size of this array is equal to the number
4354            of local rows, i.e 'm'.
4355 
4356   Output Parameter:
4357 . A - the matrix
4358 
4359   Options Database Keys:
4360 + -mat_no_inode                     - Do not use inodes
4361 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4362 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4363         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4364         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4365 
4366   Level: intermediate
4367 
4368   Notes:
4369   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4370   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4371   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4372 
4373   If the *_nnz parameter is given then the *_nz parameter is ignored
4374 
4375   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4376   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4377   storage requirements for this matrix.
4378 
4379   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4380   processor than it must be used on all processors that share the object for
4381   that argument.
4382 
4383   The user MUST specify either the local or global matrix dimensions
4384   (possibly both).
4385 
4386   The parallel matrix is partitioned across processors such that the
4387   first m0 rows belong to process 0, the next m1 rows belong to
4388   process 1, the next m2 rows belong to process 2 etc.. where
4389   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4390   values corresponding to [m x N] submatrix.
4391 
4392   The columns are logically partitioned with the n0 columns belonging
4393   to 0th partition, the next n1 columns belonging to the next
4394   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4395 
4396   The DIAGONAL portion of the local submatrix on any given processor
4397   is the submatrix corresponding to the rows and columns m,n
4398   corresponding to the given processor. i.e diagonal matrix on
4399   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4400   etc. The remaining portion of the local submatrix [m x (N-n)]
4401   constitute the OFF-DIAGONAL portion. The example below better
4402   illustrates this concept.
4403 
4404   For a square global matrix we define each processor's diagonal portion
4405   to be its local rows and the corresponding columns (a square submatrix);
4406   each processor's off-diagonal portion encompasses the remainder of the
4407   local matrix (a rectangular submatrix).
4408 
4409   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4410 
4411   When calling this routine with a single process communicator, a matrix of
4412   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4413   type of communicator, use the construction mechanism
4414 .vb
4415   MatCreate(..., &A);
4416   MatSetType(A, MATMPIAIJ);
4417   MatSetSizes(A, m, n, M, N);
4418   MatMPIAIJSetPreallocation(A, ...);
4419 .ve
4420 
4421   By default, this format uses inodes (identical nodes) when possible.
4422   We search for consecutive rows with the same nonzero structure, thereby
4423   reusing matrix information to achieve increased efficiency.
4424 
4425   Example Usage:
4426   Consider the following 8x8 matrix with 34 non-zero values, that is
4427   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4428   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4429   as follows
4430 
4431 .vb
4432             1  2  0  |  0  3  0  |  0  4
4433     Proc0   0  5  6  |  7  0  0  |  8  0
4434             9  0 10  | 11  0  0  | 12  0
4435     -------------------------------------
4436            13  0 14  | 15 16 17  |  0  0
4437     Proc1   0 18  0  | 19 20 21  |  0  0
4438             0  0  0  | 22 23  0  | 24  0
4439     -------------------------------------
4440     Proc2  25 26 27  |  0  0 28  | 29  0
4441            30  0  0  | 31 32 33  |  0 34
4442 .ve
4443 
4444   This can be represented as a collection of submatrices as
4445 
4446 .vb
4447       A B C
4448       D E F
4449       G H I
4450 .ve
4451 
4452   Where the submatrices A,B,C are owned by proc0, D,E,F are
4453   owned by proc1, G,H,I are owned by proc2.
4454 
4455   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4456   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4457   The 'M','N' parameters are 8,8, and have the same values on all procs.
4458 
4459   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4460   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4461   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4462   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4463   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4464   matrix, ans [DF] as another SeqAIJ matrix.
4465 
4466   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4467   allocated for every row of the local diagonal submatrix, and `o_nz`
4468   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4469   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4470   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4471   In this case, the values of `d_nz`,`o_nz` are
4472 .vb
4473      proc0  dnz = 2, o_nz = 2
4474      proc1  dnz = 3, o_nz = 2
4475      proc2  dnz = 1, o_nz = 4
4476 .ve
4477   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4478   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4479   for proc3. i.e we are using 12+15+10=37 storage locations to store
4480   34 values.
4481 
4482   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4483   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4484   In the above case the values for d_nnz,o_nnz are
4485 .vb
4486      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4487      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4488      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4489 .ve
4490   Here the space allocated is sum of all the above values i.e 34, and
4491   hence pre-allocation is perfect.
4492 
4493 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4494           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4495 @*/
4496 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4497 {
4498   PetscMPIInt size;
4499 
4500   PetscFunctionBegin;
4501   PetscCall(MatCreate(comm, A));
4502   PetscCall(MatSetSizes(*A, m, n, M, N));
4503   PetscCallMPI(MPI_Comm_size(comm, &size));
4504   if (size > 1) {
4505     PetscCall(MatSetType(*A, MATMPIAIJ));
4506     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4507   } else {
4508     PetscCall(MatSetType(*A, MATSEQAIJ));
4509     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4510   }
4511   PetscFunctionReturn(PETSC_SUCCESS);
4512 }
4513 
4514 /*MC
4515     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4516 
4517     Synopsis:
4518     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4519 
4520     Not Collective
4521 
4522     Input Parameter:
4523 .   A - the `MATMPIAIJ` matrix
4524 
4525     Output Parameters:
4526 +   Ad - the diagonal portion of the matrix
4527 .   Ao - the off-diagonal portion of the matrix
4528 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4529 -   ierr - error code
4530 
4531      Level: advanced
4532 
4533     Note:
4534     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4535 
4536 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4537 M*/
4538 
4539 /*MC
4540     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4541 
4542     Synopsis:
4543     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4544 
4545     Not Collective
4546 
4547     Input Parameters:
4548 +   A - the `MATMPIAIJ` matrix
4549 .   Ad - the diagonal portion of the matrix
4550 .   Ao - the off-diagonal portion of the matrix
4551 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4552 -   ierr - error code
4553 
4554      Level: advanced
4555 
4556 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4557 M*/
4558 
4559 /*@C
4560   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4561 
4562   Not Collective
4563 
4564   Input Parameter:
4565 . A - The `MATMPIAIJ` matrix
4566 
4567   Output Parameters:
4568 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4569 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4570 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4571 
4572   Level: intermediate
4573 
4574   Note:
4575   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4576   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4577   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4578   local column numbers to global column numbers in the original matrix.
4579 
4580   Fortran Notes:
4581   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4582 
4583 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4584 @*/
4585 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4586 {
4587   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4588   PetscBool   flg;
4589 
4590   PetscFunctionBegin;
4591   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4592   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4593   if (Ad) *Ad = a->A;
4594   if (Ao) *Ao = a->B;
4595   if (colmap) *colmap = a->garray;
4596   PetscFunctionReturn(PETSC_SUCCESS);
4597 }
4598 
4599 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4600 {
4601   PetscInt     m, N, i, rstart, nnz, Ii;
4602   PetscInt    *indx;
4603   PetscScalar *values;
4604   MatType      rootType;
4605 
4606   PetscFunctionBegin;
4607   PetscCall(MatGetSize(inmat, &m, &N));
4608   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4609     PetscInt *dnz, *onz, sum, bs, cbs;
4610 
4611     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4612     /* Check sum(n) = N */
4613     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4614     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4615 
4616     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4617     rstart -= m;
4618 
4619     MatPreallocateBegin(comm, m, n, dnz, onz);
4620     for (i = 0; i < m; i++) {
4621       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4622       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4623       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4624     }
4625 
4626     PetscCall(MatCreate(comm, outmat));
4627     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4628     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4629     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4630     PetscCall(MatGetRootType_Private(inmat, &rootType));
4631     PetscCall(MatSetType(*outmat, rootType));
4632     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4633     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4634     MatPreallocateEnd(dnz, onz);
4635     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4636   }
4637 
4638   /* numeric phase */
4639   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4640   for (i = 0; i < m; i++) {
4641     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4642     Ii = i + rstart;
4643     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4644     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4645   }
4646   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4647   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4648   PetscFunctionReturn(PETSC_SUCCESS);
4649 }
4650 
4651 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4652 {
4653   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4654 
4655   PetscFunctionBegin;
4656   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4657   PetscCall(PetscFree(merge->id_r));
4658   PetscCall(PetscFree(merge->len_s));
4659   PetscCall(PetscFree(merge->len_r));
4660   PetscCall(PetscFree(merge->bi));
4661   PetscCall(PetscFree(merge->bj));
4662   PetscCall(PetscFree(merge->buf_ri[0]));
4663   PetscCall(PetscFree(merge->buf_ri));
4664   PetscCall(PetscFree(merge->buf_rj[0]));
4665   PetscCall(PetscFree(merge->buf_rj));
4666   PetscCall(PetscFree(merge->coi));
4667   PetscCall(PetscFree(merge->coj));
4668   PetscCall(PetscFree(merge->owners_co));
4669   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4670   PetscCall(PetscFree(merge));
4671   PetscFunctionReturn(PETSC_SUCCESS);
4672 }
4673 
4674 #include <../src/mat/utils/freespace.h>
4675 #include <petscbt.h>
4676 
4677 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4678 {
4679   MPI_Comm             comm;
4680   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4681   PetscMPIInt          size, rank, taga, *len_s;
4682   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4683   PetscInt             proc, m;
4684   PetscInt           **buf_ri, **buf_rj;
4685   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4686   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4687   MPI_Request         *s_waits, *r_waits;
4688   MPI_Status          *status;
4689   const MatScalar     *aa, *a_a;
4690   MatScalar          **abuf_r, *ba_i;
4691   Mat_Merge_SeqsToMPI *merge;
4692   PetscContainer       container;
4693 
4694   PetscFunctionBegin;
4695   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4696   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4697 
4698   PetscCallMPI(MPI_Comm_size(comm, &size));
4699   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4700 
4701   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4702   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4703   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4704   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4705   aa = a_a;
4706 
4707   bi     = merge->bi;
4708   bj     = merge->bj;
4709   buf_ri = merge->buf_ri;
4710   buf_rj = merge->buf_rj;
4711 
4712   PetscCall(PetscMalloc1(size, &status));
4713   owners = merge->rowmap->range;
4714   len_s  = merge->len_s;
4715 
4716   /* send and recv matrix values */
4717   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4718   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4719 
4720   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4721   for (proc = 0, k = 0; proc < size; proc++) {
4722     if (!len_s[proc]) continue;
4723     i = owners[proc];
4724     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4725     k++;
4726   }
4727 
4728   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4729   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4730   PetscCall(PetscFree(status));
4731 
4732   PetscCall(PetscFree(s_waits));
4733   PetscCall(PetscFree(r_waits));
4734 
4735   /* insert mat values of mpimat */
4736   PetscCall(PetscMalloc1(N, &ba_i));
4737   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4738 
4739   for (k = 0; k < merge->nrecv; k++) {
4740     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4741     nrows       = *(buf_ri_k[k]);
4742     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4743     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4744   }
4745 
4746   /* set values of ba */
4747   m = merge->rowmap->n;
4748   for (i = 0; i < m; i++) {
4749     arow = owners[rank] + i;
4750     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4751     bnzi = bi[i + 1] - bi[i];
4752     PetscCall(PetscArrayzero(ba_i, bnzi));
4753 
4754     /* add local non-zero vals of this proc's seqmat into ba */
4755     anzi   = ai[arow + 1] - ai[arow];
4756     aj     = a->j + ai[arow];
4757     aa     = a_a + ai[arow];
4758     nextaj = 0;
4759     for (j = 0; nextaj < anzi; j++) {
4760       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4761         ba_i[j] += aa[nextaj++];
4762       }
4763     }
4764 
4765     /* add received vals into ba */
4766     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4767       /* i-th row */
4768       if (i == *nextrow[k]) {
4769         anzi   = *(nextai[k] + 1) - *nextai[k];
4770         aj     = buf_rj[k] + *(nextai[k]);
4771         aa     = abuf_r[k] + *(nextai[k]);
4772         nextaj = 0;
4773         for (j = 0; nextaj < anzi; j++) {
4774           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4775             ba_i[j] += aa[nextaj++];
4776           }
4777         }
4778         nextrow[k]++;
4779         nextai[k]++;
4780       }
4781     }
4782     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4783   }
4784   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4785   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4786   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4787 
4788   PetscCall(PetscFree(abuf_r[0]));
4789   PetscCall(PetscFree(abuf_r));
4790   PetscCall(PetscFree(ba_i));
4791   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4792   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4793   PetscFunctionReturn(PETSC_SUCCESS);
4794 }
4795 
4796 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4797 {
4798   Mat                  B_mpi;
4799   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4800   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4801   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4802   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4803   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4804   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4805   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4806   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4807   MPI_Status          *status;
4808   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4809   PetscBT              lnkbt;
4810   Mat_Merge_SeqsToMPI *merge;
4811   PetscContainer       container;
4812 
4813   PetscFunctionBegin;
4814   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4815 
4816   /* make sure it is a PETSc comm */
4817   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4818   PetscCallMPI(MPI_Comm_size(comm, &size));
4819   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4820 
4821   PetscCall(PetscNew(&merge));
4822   PetscCall(PetscMalloc1(size, &status));
4823 
4824   /* determine row ownership */
4825   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4826   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4827   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4828   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4829   PetscCall(PetscLayoutSetUp(merge->rowmap));
4830   PetscCall(PetscMalloc1(size, &len_si));
4831   PetscCall(PetscMalloc1(size, &merge->len_s));
4832 
4833   m      = merge->rowmap->n;
4834   owners = merge->rowmap->range;
4835 
4836   /* determine the number of messages to send, their lengths */
4837   len_s = merge->len_s;
4838 
4839   len          = 0; /* length of buf_si[] */
4840   merge->nsend = 0;
4841   for (proc = 0; proc < size; proc++) {
4842     len_si[proc] = 0;
4843     if (proc == rank) {
4844       len_s[proc] = 0;
4845     } else {
4846       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4847       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4848     }
4849     if (len_s[proc]) {
4850       merge->nsend++;
4851       nrows = 0;
4852       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4853         if (ai[i + 1] > ai[i]) nrows++;
4854       }
4855       len_si[proc] = 2 * (nrows + 1);
4856       len += len_si[proc];
4857     }
4858   }
4859 
4860   /* determine the number and length of messages to receive for ij-structure */
4861   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4862   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4863 
4864   /* post the Irecv of j-structure */
4865   PetscCall(PetscCommGetNewTag(comm, &tagj));
4866   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4867 
4868   /* post the Isend of j-structure */
4869   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4870 
4871   for (proc = 0, k = 0; proc < size; proc++) {
4872     if (!len_s[proc]) continue;
4873     i = owners[proc];
4874     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4875     k++;
4876   }
4877 
4878   /* receives and sends of j-structure are complete */
4879   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4880   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4881 
4882   /* send and recv i-structure */
4883   PetscCall(PetscCommGetNewTag(comm, &tagi));
4884   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4885 
4886   PetscCall(PetscMalloc1(len + 1, &buf_s));
4887   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4888   for (proc = 0, k = 0; proc < size; proc++) {
4889     if (!len_s[proc]) continue;
4890     /* form outgoing message for i-structure:
4891          buf_si[0]:                 nrows to be sent
4892                [1:nrows]:           row index (global)
4893                [nrows+1:2*nrows+1]: i-structure index
4894     */
4895     nrows       = len_si[proc] / 2 - 1;
4896     buf_si_i    = buf_si + nrows + 1;
4897     buf_si[0]   = nrows;
4898     buf_si_i[0] = 0;
4899     nrows       = 0;
4900     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4901       anzi = ai[i + 1] - ai[i];
4902       if (anzi) {
4903         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4904         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4905         nrows++;
4906       }
4907     }
4908     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4909     k++;
4910     buf_si += len_si[proc];
4911   }
4912 
4913   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4914   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4915 
4916   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4917   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4918 
4919   PetscCall(PetscFree(len_si));
4920   PetscCall(PetscFree(len_ri));
4921   PetscCall(PetscFree(rj_waits));
4922   PetscCall(PetscFree2(si_waits, sj_waits));
4923   PetscCall(PetscFree(ri_waits));
4924   PetscCall(PetscFree(buf_s));
4925   PetscCall(PetscFree(status));
4926 
4927   /* compute a local seq matrix in each processor */
4928   /* allocate bi array and free space for accumulating nonzero column info */
4929   PetscCall(PetscMalloc1(m + 1, &bi));
4930   bi[0] = 0;
4931 
4932   /* create and initialize a linked list */
4933   nlnk = N + 1;
4934   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4935 
4936   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4937   len = ai[owners[rank + 1]] - ai[owners[rank]];
4938   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4939 
4940   current_space = free_space;
4941 
4942   /* determine symbolic info for each local row */
4943   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4944 
4945   for (k = 0; k < merge->nrecv; k++) {
4946     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4947     nrows       = *buf_ri_k[k];
4948     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4949     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4950   }
4951 
4952   MatPreallocateBegin(comm, m, n, dnz, onz);
4953   len = 0;
4954   for (i = 0; i < m; i++) {
4955     bnzi = 0;
4956     /* add local non-zero cols of this proc's seqmat into lnk */
4957     arow = owners[rank] + i;
4958     anzi = ai[arow + 1] - ai[arow];
4959     aj   = a->j + ai[arow];
4960     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4961     bnzi += nlnk;
4962     /* add received col data into lnk */
4963     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4964       if (i == *nextrow[k]) {            /* i-th row */
4965         anzi = *(nextai[k] + 1) - *nextai[k];
4966         aj   = buf_rj[k] + *nextai[k];
4967         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4968         bnzi += nlnk;
4969         nextrow[k]++;
4970         nextai[k]++;
4971       }
4972     }
4973     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4974 
4975     /* if free space is not available, make more free space */
4976     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
4977     /* copy data into free space, then initialize lnk */
4978     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
4979     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4980 
4981     current_space->array += bnzi;
4982     current_space->local_used += bnzi;
4983     current_space->local_remaining -= bnzi;
4984 
4985     bi[i + 1] = bi[i] + bnzi;
4986   }
4987 
4988   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4989 
4990   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
4991   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
4992   PetscCall(PetscLLDestroy(lnk, lnkbt));
4993 
4994   /* create symbolic parallel matrix B_mpi */
4995   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
4996   PetscCall(MatCreate(comm, &B_mpi));
4997   if (n == PETSC_DECIDE) {
4998     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
4999   } else {
5000     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5001   }
5002   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5003   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5004   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5005   MatPreallocateEnd(dnz, onz);
5006   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5007 
5008   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5009   B_mpi->assembled = PETSC_FALSE;
5010   merge->bi        = bi;
5011   merge->bj        = bj;
5012   merge->buf_ri    = buf_ri;
5013   merge->buf_rj    = buf_rj;
5014   merge->coi       = NULL;
5015   merge->coj       = NULL;
5016   merge->owners_co = NULL;
5017 
5018   PetscCall(PetscCommDestroy(&comm));
5019 
5020   /* attach the supporting struct to B_mpi for reuse */
5021   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5022   PetscCall(PetscContainerSetPointer(container, merge));
5023   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5024   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5025   PetscCall(PetscContainerDestroy(&container));
5026   *mpimat = B_mpi;
5027 
5028   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5029   PetscFunctionReturn(PETSC_SUCCESS);
5030 }
5031 
5032 /*@C
5033   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5034   matrices from each processor
5035 
5036   Collective
5037 
5038   Input Parameters:
5039 + comm   - the communicators the parallel matrix will live on
5040 . seqmat - the input sequential matrices
5041 . m      - number of local rows (or `PETSC_DECIDE`)
5042 . n      - number of local columns (or `PETSC_DECIDE`)
5043 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5044 
5045   Output Parameter:
5046 . mpimat - the parallel matrix generated
5047 
5048   Level: advanced
5049 
5050   Note:
5051   The dimensions of the sequential matrix in each processor MUST be the same.
5052   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5053   destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5054 
5055 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5056 @*/
5057 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5058 {
5059   PetscMPIInt size;
5060 
5061   PetscFunctionBegin;
5062   PetscCallMPI(MPI_Comm_size(comm, &size));
5063   if (size == 1) {
5064     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5065     if (scall == MAT_INITIAL_MATRIX) {
5066       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5067     } else {
5068       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5069     }
5070     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5071     PetscFunctionReturn(PETSC_SUCCESS);
5072   }
5073   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5074   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5075   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5076   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5077   PetscFunctionReturn(PETSC_SUCCESS);
5078 }
5079 
5080 /*@
5081   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5082 
5083   Not Collective
5084 
5085   Input Parameter:
5086 . A - the matrix
5087 
5088   Output Parameter:
5089 . A_loc - the local sequential matrix generated
5090 
5091   Level: developer
5092 
5093   Notes:
5094   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5095   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5096   `n` is the global column count obtained with `MatGetSize()`
5097 
5098   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5099 
5100   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5101 
5102   Destroy the matrix with `MatDestroy()`
5103 
5104 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5105 @*/
5106 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5107 {
5108   PetscBool mpi;
5109 
5110   PetscFunctionBegin;
5111   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5112   if (mpi) {
5113     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5114   } else {
5115     *A_loc = A;
5116     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5117   }
5118   PetscFunctionReturn(PETSC_SUCCESS);
5119 }
5120 
5121 /*@
5122   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5123 
5124   Not Collective
5125 
5126   Input Parameters:
5127 + A     - the matrix
5128 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5129 
5130   Output Parameter:
5131 . A_loc - the local sequential matrix generated
5132 
5133   Level: developer
5134 
5135   Notes:
5136   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5137   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5138   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5139 
5140   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5141 
5142   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5143   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5144   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5145   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5146 
5147 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5148 @*/
5149 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5150 {
5151   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5152   Mat_SeqAIJ        *mat, *a, *b;
5153   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5154   const PetscScalar *aa, *ba, *aav, *bav;
5155   PetscScalar       *ca, *cam;
5156   PetscMPIInt        size;
5157   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5158   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5159   PetscBool          match;
5160 
5161   PetscFunctionBegin;
5162   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5163   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5164   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5165   if (size == 1) {
5166     if (scall == MAT_INITIAL_MATRIX) {
5167       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5168       *A_loc = mpimat->A;
5169     } else if (scall == MAT_REUSE_MATRIX) {
5170       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5171     }
5172     PetscFunctionReturn(PETSC_SUCCESS);
5173   }
5174 
5175   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5176   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5177   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5178   ai = a->i;
5179   aj = a->j;
5180   bi = b->i;
5181   bj = b->j;
5182   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5183   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5184   aa = aav;
5185   ba = bav;
5186   if (scall == MAT_INITIAL_MATRIX) {
5187     PetscCall(PetscMalloc1(1 + am, &ci));
5188     ci[0] = 0;
5189     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5190     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5191     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5192     k = 0;
5193     for (i = 0; i < am; i++) {
5194       ncols_o = bi[i + 1] - bi[i];
5195       ncols_d = ai[i + 1] - ai[i];
5196       /* off-diagonal portion of A */
5197       for (jo = 0; jo < ncols_o; jo++) {
5198         col = cmap[*bj];
5199         if (col >= cstart) break;
5200         cj[k] = col;
5201         bj++;
5202         ca[k++] = *ba++;
5203       }
5204       /* diagonal portion of A */
5205       for (j = 0; j < ncols_d; j++) {
5206         cj[k]   = cstart + *aj++;
5207         ca[k++] = *aa++;
5208       }
5209       /* off-diagonal portion of A */
5210       for (j = jo; j < ncols_o; j++) {
5211         cj[k]   = cmap[*bj++];
5212         ca[k++] = *ba++;
5213       }
5214     }
5215     /* put together the new matrix */
5216     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5217     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5218     /* Since these are PETSc arrays, change flags to free them as necessary. */
5219     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5220     mat->free_a  = PETSC_TRUE;
5221     mat->free_ij = PETSC_TRUE;
5222     mat->nonew   = 0;
5223   } else if (scall == MAT_REUSE_MATRIX) {
5224     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5225     ci  = mat->i;
5226     cj  = mat->j;
5227     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5228     for (i = 0; i < am; i++) {
5229       /* off-diagonal portion of A */
5230       ncols_o = bi[i + 1] - bi[i];
5231       for (jo = 0; jo < ncols_o; jo++) {
5232         col = cmap[*bj];
5233         if (col >= cstart) break;
5234         *cam++ = *ba++;
5235         bj++;
5236       }
5237       /* diagonal portion of A */
5238       ncols_d = ai[i + 1] - ai[i];
5239       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5240       /* off-diagonal portion of A */
5241       for (j = jo; j < ncols_o; j++) {
5242         *cam++ = *ba++;
5243         bj++;
5244       }
5245     }
5246     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5247   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5248   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5249   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5250   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5251   PetscFunctionReturn(PETSC_SUCCESS);
5252 }
5253 
5254 /*@
5255   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5256   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5257 
5258   Not Collective
5259 
5260   Input Parameters:
5261 + A     - the matrix
5262 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5263 
5264   Output Parameters:
5265 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5266 - A_loc - the local sequential matrix generated
5267 
5268   Level: developer
5269 
5270   Note:
5271   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5272   part, then those associated with the off-diagonal part (in its local ordering)
5273 
5274 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5275 @*/
5276 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5277 {
5278   Mat             Ao, Ad;
5279   const PetscInt *cmap;
5280   PetscMPIInt     size;
5281   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5282 
5283   PetscFunctionBegin;
5284   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5285   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5286   if (size == 1) {
5287     if (scall == MAT_INITIAL_MATRIX) {
5288       PetscCall(PetscObjectReference((PetscObject)Ad));
5289       *A_loc = Ad;
5290     } else if (scall == MAT_REUSE_MATRIX) {
5291       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5292     }
5293     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5294     PetscFunctionReturn(PETSC_SUCCESS);
5295   }
5296   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5297   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5298   if (f) {
5299     PetscCall((*f)(A, scall, glob, A_loc));
5300   } else {
5301     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5302     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5303     Mat_SeqAIJ        *c;
5304     PetscInt          *ai = a->i, *aj = a->j;
5305     PetscInt          *bi = b->i, *bj = b->j;
5306     PetscInt          *ci, *cj;
5307     const PetscScalar *aa, *ba;
5308     PetscScalar       *ca;
5309     PetscInt           i, j, am, dn, on;
5310 
5311     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5312     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5313     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5314     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5315     if (scall == MAT_INITIAL_MATRIX) {
5316       PetscInt k;
5317       PetscCall(PetscMalloc1(1 + am, &ci));
5318       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5319       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5320       ci[0] = 0;
5321       for (i = 0, k = 0; i < am; i++) {
5322         const PetscInt ncols_o = bi[i + 1] - bi[i];
5323         const PetscInt ncols_d = ai[i + 1] - ai[i];
5324         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5325         /* diagonal portion of A */
5326         for (j = 0; j < ncols_d; j++, k++) {
5327           cj[k] = *aj++;
5328           ca[k] = *aa++;
5329         }
5330         /* off-diagonal portion of A */
5331         for (j = 0; j < ncols_o; j++, k++) {
5332           cj[k] = dn + *bj++;
5333           ca[k] = *ba++;
5334         }
5335       }
5336       /* put together the new matrix */
5337       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5338       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5339       /* Since these are PETSc arrays, change flags to free them as necessary. */
5340       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5341       c->free_a  = PETSC_TRUE;
5342       c->free_ij = PETSC_TRUE;
5343       c->nonew   = 0;
5344       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5345     } else if (scall == MAT_REUSE_MATRIX) {
5346       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5347       for (i = 0; i < am; i++) {
5348         const PetscInt ncols_d = ai[i + 1] - ai[i];
5349         const PetscInt ncols_o = bi[i + 1] - bi[i];
5350         /* diagonal portion of A */
5351         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5352         /* off-diagonal portion of A */
5353         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5354       }
5355       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5356     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5357     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5358     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5359     if (glob) {
5360       PetscInt cst, *gidx;
5361 
5362       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5363       PetscCall(PetscMalloc1(dn + on, &gidx));
5364       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5365       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5366       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5367     }
5368   }
5369   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5370   PetscFunctionReturn(PETSC_SUCCESS);
5371 }
5372 
5373 /*@C
5374   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5375 
5376   Not Collective
5377 
5378   Input Parameters:
5379 + A     - the matrix
5380 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5381 . row   - index set of rows to extract (or `NULL`)
5382 - col   - index set of columns to extract (or `NULL`)
5383 
5384   Output Parameter:
5385 . A_loc - the local sequential matrix generated
5386 
5387   Level: developer
5388 
5389 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5390 @*/
5391 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5392 {
5393   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5394   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5395   IS          isrowa, iscola;
5396   Mat        *aloc;
5397   PetscBool   match;
5398 
5399   PetscFunctionBegin;
5400   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5401   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5402   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5403   if (!row) {
5404     start = A->rmap->rstart;
5405     end   = A->rmap->rend;
5406     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5407   } else {
5408     isrowa = *row;
5409   }
5410   if (!col) {
5411     start = A->cmap->rstart;
5412     cmap  = a->garray;
5413     nzA   = a->A->cmap->n;
5414     nzB   = a->B->cmap->n;
5415     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5416     ncols = 0;
5417     for (i = 0; i < nzB; i++) {
5418       if (cmap[i] < start) idx[ncols++] = cmap[i];
5419       else break;
5420     }
5421     imark = i;
5422     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5423     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5424     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5425   } else {
5426     iscola = *col;
5427   }
5428   if (scall != MAT_INITIAL_MATRIX) {
5429     PetscCall(PetscMalloc1(1, &aloc));
5430     aloc[0] = *A_loc;
5431   }
5432   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5433   if (!col) { /* attach global id of condensed columns */
5434     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5435   }
5436   *A_loc = aloc[0];
5437   PetscCall(PetscFree(aloc));
5438   if (!row) PetscCall(ISDestroy(&isrowa));
5439   if (!col) PetscCall(ISDestroy(&iscola));
5440   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5441   PetscFunctionReturn(PETSC_SUCCESS);
5442 }
5443 
5444 /*
5445  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5446  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5447  * on a global size.
5448  * */
5449 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5450 {
5451   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5452   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5453   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5454   PetscMPIInt            owner;
5455   PetscSFNode           *iremote, *oiremote;
5456   const PetscInt        *lrowindices;
5457   PetscSF                sf, osf;
5458   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5459   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5460   MPI_Comm               comm;
5461   ISLocalToGlobalMapping mapping;
5462   const PetscScalar     *pd_a, *po_a;
5463 
5464   PetscFunctionBegin;
5465   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5466   /* plocalsize is the number of roots
5467    * nrows is the number of leaves
5468    * */
5469   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5470   PetscCall(ISGetLocalSize(rows, &nrows));
5471   PetscCall(PetscCalloc1(nrows, &iremote));
5472   PetscCall(ISGetIndices(rows, &lrowindices));
5473   for (i = 0; i < nrows; i++) {
5474     /* Find a remote index and an owner for a row
5475      * The row could be local or remote
5476      * */
5477     owner = 0;
5478     lidx  = 0;
5479     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5480     iremote[i].index = lidx;
5481     iremote[i].rank  = owner;
5482   }
5483   /* Create SF to communicate how many nonzero columns for each row */
5484   PetscCall(PetscSFCreate(comm, &sf));
5485   /* SF will figure out the number of nonzero columns for each row, and their
5486    * offsets
5487    * */
5488   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5489   PetscCall(PetscSFSetFromOptions(sf));
5490   PetscCall(PetscSFSetUp(sf));
5491 
5492   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5493   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5494   PetscCall(PetscCalloc1(nrows, &pnnz));
5495   roffsets[0] = 0;
5496   roffsets[1] = 0;
5497   for (i = 0; i < plocalsize; i++) {
5498     /* diagonal */
5499     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5500     /* off-diagonal */
5501     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5502     /* compute offsets so that we relative location for each row */
5503     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5504     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5505   }
5506   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5507   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5508   /* 'r' means root, and 'l' means leaf */
5509   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5510   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5511   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5512   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5513   PetscCall(PetscSFDestroy(&sf));
5514   PetscCall(PetscFree(roffsets));
5515   PetscCall(PetscFree(nrcols));
5516   dntotalcols = 0;
5517   ontotalcols = 0;
5518   ncol        = 0;
5519   for (i = 0; i < nrows; i++) {
5520     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5521     ncol    = PetscMax(pnnz[i], ncol);
5522     /* diagonal */
5523     dntotalcols += nlcols[i * 2 + 0];
5524     /* off-diagonal */
5525     ontotalcols += nlcols[i * 2 + 1];
5526   }
5527   /* We do not need to figure the right number of columns
5528    * since all the calculations will be done by going through the raw data
5529    * */
5530   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5531   PetscCall(MatSetUp(*P_oth));
5532   PetscCall(PetscFree(pnnz));
5533   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5534   /* diagonal */
5535   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5536   /* off-diagonal */
5537   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5538   /* diagonal */
5539   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5540   /* off-diagonal */
5541   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5542   dntotalcols = 0;
5543   ontotalcols = 0;
5544   ntotalcols  = 0;
5545   for (i = 0; i < nrows; i++) {
5546     owner = 0;
5547     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5548     /* Set iremote for diag matrix */
5549     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5550       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5551       iremote[dntotalcols].rank  = owner;
5552       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5553       ilocal[dntotalcols++] = ntotalcols++;
5554     }
5555     /* off-diagonal */
5556     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5557       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5558       oiremote[ontotalcols].rank  = owner;
5559       oilocal[ontotalcols++]      = ntotalcols++;
5560     }
5561   }
5562   PetscCall(ISRestoreIndices(rows, &lrowindices));
5563   PetscCall(PetscFree(loffsets));
5564   PetscCall(PetscFree(nlcols));
5565   PetscCall(PetscSFCreate(comm, &sf));
5566   /* P serves as roots and P_oth is leaves
5567    * Diag matrix
5568    * */
5569   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5570   PetscCall(PetscSFSetFromOptions(sf));
5571   PetscCall(PetscSFSetUp(sf));
5572 
5573   PetscCall(PetscSFCreate(comm, &osf));
5574   /* off-diagonal */
5575   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5576   PetscCall(PetscSFSetFromOptions(osf));
5577   PetscCall(PetscSFSetUp(osf));
5578   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5579   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5580   /* operate on the matrix internal data to save memory */
5581   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5582   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5583   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5584   /* Convert to global indices for diag matrix */
5585   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5586   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5587   /* We want P_oth store global indices */
5588   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5589   /* Use memory scalable approach */
5590   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5591   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5592   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5593   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5594   /* Convert back to local indices */
5595   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5596   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5597   nout = 0;
5598   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5599   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5600   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5601   /* Exchange values */
5602   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5603   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5604   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5605   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5606   /* Stop PETSc from shrinking memory */
5607   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5608   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5609   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5610   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5611   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5612   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5613   PetscCall(PetscSFDestroy(&sf));
5614   PetscCall(PetscSFDestroy(&osf));
5615   PetscFunctionReturn(PETSC_SUCCESS);
5616 }
5617 
5618 /*
5619  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5620  * This supports MPIAIJ and MAIJ
5621  * */
5622 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5623 {
5624   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5625   Mat_SeqAIJ *p_oth;
5626   IS          rows, map;
5627   PetscHMapI  hamp;
5628   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5629   MPI_Comm    comm;
5630   PetscSF     sf, osf;
5631   PetscBool   has;
5632 
5633   PetscFunctionBegin;
5634   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5635   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5636   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5637    *  and then create a submatrix (that often is an overlapping matrix)
5638    * */
5639   if (reuse == MAT_INITIAL_MATRIX) {
5640     /* Use a hash table to figure out unique keys */
5641     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5642     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5643     count = 0;
5644     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5645     for (i = 0; i < a->B->cmap->n; i++) {
5646       key = a->garray[i] / dof;
5647       PetscCall(PetscHMapIHas(hamp, key, &has));
5648       if (!has) {
5649         mapping[i] = count;
5650         PetscCall(PetscHMapISet(hamp, key, count++));
5651       } else {
5652         /* Current 'i' has the same value the previous step */
5653         mapping[i] = count - 1;
5654       }
5655     }
5656     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5657     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5658     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5659     PetscCall(PetscCalloc1(htsize, &rowindices));
5660     off = 0;
5661     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5662     PetscCall(PetscHMapIDestroy(&hamp));
5663     PetscCall(PetscSortInt(htsize, rowindices));
5664     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5665     /* In case, the matrix was already created but users want to recreate the matrix */
5666     PetscCall(MatDestroy(P_oth));
5667     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5668     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5669     PetscCall(ISDestroy(&map));
5670     PetscCall(ISDestroy(&rows));
5671   } else if (reuse == MAT_REUSE_MATRIX) {
5672     /* If matrix was already created, we simply update values using SF objects
5673      * that as attached to the matrix earlier.
5674      */
5675     const PetscScalar *pd_a, *po_a;
5676 
5677     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5678     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5679     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5680     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5681     /* Update values in place */
5682     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5683     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5684     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5685     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5686     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5687     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5688     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5689     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5690   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5691   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5692   PetscFunctionReturn(PETSC_SUCCESS);
5693 }
5694 
5695 /*@C
5696   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5697 
5698   Collective
5699 
5700   Input Parameters:
5701 + A     - the first matrix in `MATMPIAIJ` format
5702 . B     - the second matrix in `MATMPIAIJ` format
5703 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5704 
5705   Output Parameters:
5706 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5707 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5708 - B_seq - the sequential matrix generated
5709 
5710   Level: developer
5711 
5712 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5713 @*/
5714 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5715 {
5716   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5717   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5718   IS          isrowb, iscolb;
5719   Mat        *bseq = NULL;
5720 
5721   PetscFunctionBegin;
5722   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5723              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5724   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5725 
5726   if (scall == MAT_INITIAL_MATRIX) {
5727     start = A->cmap->rstart;
5728     cmap  = a->garray;
5729     nzA   = a->A->cmap->n;
5730     nzB   = a->B->cmap->n;
5731     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5732     ncols = 0;
5733     for (i = 0; i < nzB; i++) { /* row < local row index */
5734       if (cmap[i] < start) idx[ncols++] = cmap[i];
5735       else break;
5736     }
5737     imark = i;
5738     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5739     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5740     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5741     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5742   } else {
5743     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5744     isrowb = *rowb;
5745     iscolb = *colb;
5746     PetscCall(PetscMalloc1(1, &bseq));
5747     bseq[0] = *B_seq;
5748   }
5749   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5750   *B_seq = bseq[0];
5751   PetscCall(PetscFree(bseq));
5752   if (!rowb) {
5753     PetscCall(ISDestroy(&isrowb));
5754   } else {
5755     *rowb = isrowb;
5756   }
5757   if (!colb) {
5758     PetscCall(ISDestroy(&iscolb));
5759   } else {
5760     *colb = iscolb;
5761   }
5762   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5763   PetscFunctionReturn(PETSC_SUCCESS);
5764 }
5765 
5766 /*
5767     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5768     of the OFF-DIAGONAL portion of local A
5769 
5770     Collective
5771 
5772    Input Parameters:
5773 +    A,B - the matrices in `MATMPIAIJ` format
5774 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5775 
5776    Output Parameter:
5777 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5778 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5779 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5780 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5781 
5782     Developer Note:
5783     This directly accesses information inside the VecScatter associated with the matrix-vector product
5784      for this matrix. This is not desirable..
5785 
5786     Level: developer
5787 
5788 */
5789 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5790 {
5791   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5792   Mat_SeqAIJ        *b_oth;
5793   VecScatter         ctx;
5794   MPI_Comm           comm;
5795   const PetscMPIInt *rprocs, *sprocs;
5796   const PetscInt    *srow, *rstarts, *sstarts;
5797   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5798   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5799   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5800   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5801   PetscMPIInt        size, tag, rank, nreqs;
5802 
5803   PetscFunctionBegin;
5804   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5805   PetscCallMPI(MPI_Comm_size(comm, &size));
5806 
5807   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5808              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5809   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5810   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5811 
5812   if (size == 1) {
5813     startsj_s = NULL;
5814     bufa_ptr  = NULL;
5815     *B_oth    = NULL;
5816     PetscFunctionReturn(PETSC_SUCCESS);
5817   }
5818 
5819   ctx = a->Mvctx;
5820   tag = ((PetscObject)ctx)->tag;
5821 
5822   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5823   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5824   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5825   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5826   PetscCall(PetscMalloc1(nreqs, &reqs));
5827   rwaits = reqs;
5828   swaits = reqs + nrecvs;
5829 
5830   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5831   if (scall == MAT_INITIAL_MATRIX) {
5832     /* i-array */
5833     /*  post receives */
5834     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5835     for (i = 0; i < nrecvs; i++) {
5836       rowlen = rvalues + rstarts[i] * rbs;
5837       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5838       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5839     }
5840 
5841     /* pack the outgoing message */
5842     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5843 
5844     sstartsj[0] = 0;
5845     rstartsj[0] = 0;
5846     len         = 0; /* total length of j or a array to be sent */
5847     if (nsends) {
5848       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5849       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5850     }
5851     for (i = 0; i < nsends; i++) {
5852       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5853       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5854       for (j = 0; j < nrows; j++) {
5855         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5856         for (l = 0; l < sbs; l++) {
5857           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5858 
5859           rowlen[j * sbs + l] = ncols;
5860 
5861           len += ncols;
5862           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5863         }
5864         k++;
5865       }
5866       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5867 
5868       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5869     }
5870     /* recvs and sends of i-array are completed */
5871     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5872     PetscCall(PetscFree(svalues));
5873 
5874     /* allocate buffers for sending j and a arrays */
5875     PetscCall(PetscMalloc1(len + 1, &bufj));
5876     PetscCall(PetscMalloc1(len + 1, &bufa));
5877 
5878     /* create i-array of B_oth */
5879     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5880 
5881     b_othi[0] = 0;
5882     len       = 0; /* total length of j or a array to be received */
5883     k         = 0;
5884     for (i = 0; i < nrecvs; i++) {
5885       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5886       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5887       for (j = 0; j < nrows; j++) {
5888         b_othi[k + 1] = b_othi[k] + rowlen[j];
5889         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5890         k++;
5891       }
5892       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5893     }
5894     PetscCall(PetscFree(rvalues));
5895 
5896     /* allocate space for j and a arrays of B_oth */
5897     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5898     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5899 
5900     /* j-array */
5901     /*  post receives of j-array */
5902     for (i = 0; i < nrecvs; i++) {
5903       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5904       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5905     }
5906 
5907     /* pack the outgoing message j-array */
5908     if (nsends) k = sstarts[0];
5909     for (i = 0; i < nsends; i++) {
5910       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5911       bufJ  = bufj + sstartsj[i];
5912       for (j = 0; j < nrows; j++) {
5913         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5914         for (ll = 0; ll < sbs; ll++) {
5915           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5916           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5917           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5918         }
5919       }
5920       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5921     }
5922 
5923     /* recvs and sends of j-array are completed */
5924     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5925   } else if (scall == MAT_REUSE_MATRIX) {
5926     sstartsj = *startsj_s;
5927     rstartsj = *startsj_r;
5928     bufa     = *bufa_ptr;
5929     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5930     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5931   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5932 
5933   /* a-array */
5934   /*  post receives of a-array */
5935   for (i = 0; i < nrecvs; i++) {
5936     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5937     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5938   }
5939 
5940   /* pack the outgoing message a-array */
5941   if (nsends) k = sstarts[0];
5942   for (i = 0; i < nsends; i++) {
5943     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5944     bufA  = bufa + sstartsj[i];
5945     for (j = 0; j < nrows; j++) {
5946       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5947       for (ll = 0; ll < sbs; ll++) {
5948         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5949         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5950         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5951       }
5952     }
5953     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5954   }
5955   /* recvs and sends of a-array are completed */
5956   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5957   PetscCall(PetscFree(reqs));
5958 
5959   if (scall == MAT_INITIAL_MATRIX) {
5960     /* put together the new matrix */
5961     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5962 
5963     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5964     /* Since these are PETSc arrays, change flags to free them as necessary. */
5965     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5966     b_oth->free_a  = PETSC_TRUE;
5967     b_oth->free_ij = PETSC_TRUE;
5968     b_oth->nonew   = 0;
5969 
5970     PetscCall(PetscFree(bufj));
5971     if (!startsj_s || !bufa_ptr) {
5972       PetscCall(PetscFree2(sstartsj, rstartsj));
5973       PetscCall(PetscFree(bufa_ptr));
5974     } else {
5975       *startsj_s = sstartsj;
5976       *startsj_r = rstartsj;
5977       *bufa_ptr  = bufa;
5978     }
5979   } else if (scall == MAT_REUSE_MATRIX) {
5980     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5981   }
5982 
5983   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
5984   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
5985   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5986   PetscFunctionReturn(PETSC_SUCCESS);
5987 }
5988 
5989 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5991 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
5992 #if defined(PETSC_HAVE_MKL_SPARSE)
5993 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5994 #endif
5995 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5996 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
5997 #if defined(PETSC_HAVE_ELEMENTAL)
5998 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
5999 #endif
6000 #if defined(PETSC_HAVE_SCALAPACK)
6001 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6002 #endif
6003 #if defined(PETSC_HAVE_HYPRE)
6004 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6005 #endif
6006 #if defined(PETSC_HAVE_CUDA)
6007 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6008 #endif
6009 #if defined(PETSC_HAVE_HIP)
6010 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6011 #endif
6012 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6013 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6014 #endif
6015 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6016 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6017 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6018 
6019 /*
6020     Computes (B'*A')' since computing B*A directly is untenable
6021 
6022                n                       p                          p
6023         [             ]       [             ]         [                 ]
6024       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6025         [             ]       [             ]         [                 ]
6026 
6027 */
6028 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6029 {
6030   Mat At, Bt, Ct;
6031 
6032   PetscFunctionBegin;
6033   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6034   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6035   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6036   PetscCall(MatDestroy(&At));
6037   PetscCall(MatDestroy(&Bt));
6038   PetscCall(MatTransposeSetPrecursor(Ct, C));
6039   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6040   PetscCall(MatDestroy(&Ct));
6041   PetscFunctionReturn(PETSC_SUCCESS);
6042 }
6043 
6044 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6045 {
6046   PetscBool cisdense;
6047 
6048   PetscFunctionBegin;
6049   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6050   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6051   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6052   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6053   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6054   PetscCall(MatSetUp(C));
6055 
6056   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6057   PetscFunctionReturn(PETSC_SUCCESS);
6058 }
6059 
6060 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6061 {
6062   Mat_Product *product = C->product;
6063   Mat          A = product->A, B = product->B;
6064 
6065   PetscFunctionBegin;
6066   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6067              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6068   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6069   C->ops->productsymbolic = MatProductSymbolic_AB;
6070   PetscFunctionReturn(PETSC_SUCCESS);
6071 }
6072 
6073 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6074 {
6075   Mat_Product *product = C->product;
6076 
6077   PetscFunctionBegin;
6078   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6079   PetscFunctionReturn(PETSC_SUCCESS);
6080 }
6081 
6082 /*
6083    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6084 
6085   Input Parameters:
6086 
6087     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6088     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6089 
6090     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6091 
6092     For Set1, j1[] contains column indices of the nonzeros.
6093     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6094     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6095     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6096 
6097     Similar for Set2.
6098 
6099     This routine merges the two sets of nonzeros row by row and removes repeats.
6100 
6101   Output Parameters: (memory is allocated by the caller)
6102 
6103     i[],j[]: the CSR of the merged matrix, which has m rows.
6104     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6105     imap2[]: similar to imap1[], but for Set2.
6106     Note we order nonzeros row-by-row and from left to right.
6107 */
6108 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6109 {
6110   PetscInt   r, m; /* Row index of mat */
6111   PetscCount t, t1, t2, b1, e1, b2, e2;
6112 
6113   PetscFunctionBegin;
6114   PetscCall(MatGetLocalSize(mat, &m, NULL));
6115   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6116   i[0]        = 0;
6117   for (r = 0; r < m; r++) { /* Do row by row merging */
6118     b1 = rowBegin1[r];
6119     e1 = rowEnd1[r];
6120     b2 = rowBegin2[r];
6121     e2 = rowEnd2[r];
6122     while (b1 < e1 && b2 < e2) {
6123       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6124         j[t]      = j1[b1];
6125         imap1[t1] = t;
6126         imap2[t2] = t;
6127         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6128         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6129         t1++;
6130         t2++;
6131         t++;
6132       } else if (j1[b1] < j2[b2]) {
6133         j[t]      = j1[b1];
6134         imap1[t1] = t;
6135         b1 += jmap1[t1 + 1] - jmap1[t1];
6136         t1++;
6137         t++;
6138       } else {
6139         j[t]      = j2[b2];
6140         imap2[t2] = t;
6141         b2 += jmap2[t2 + 1] - jmap2[t2];
6142         t2++;
6143         t++;
6144       }
6145     }
6146     /* Merge the remaining in either j1[] or j2[] */
6147     while (b1 < e1) {
6148       j[t]      = j1[b1];
6149       imap1[t1] = t;
6150       b1 += jmap1[t1 + 1] - jmap1[t1];
6151       t1++;
6152       t++;
6153     }
6154     while (b2 < e2) {
6155       j[t]      = j2[b2];
6156       imap2[t2] = t;
6157       b2 += jmap2[t2 + 1] - jmap2[t2];
6158       t2++;
6159       t++;
6160     }
6161     i[r + 1] = t;
6162   }
6163   PetscFunctionReturn(PETSC_SUCCESS);
6164 }
6165 
6166 /*
6167   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6168 
6169   Input Parameters:
6170     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6171     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6172       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6173 
6174       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6175       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6176 
6177   Output Parameters:
6178     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6179     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6180       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6181       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6182 
6183     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6184       Atot: number of entries belonging to the diagonal block.
6185       Annz: number of unique nonzeros belonging to the diagonal block.
6186       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6187         repeats (i.e., same 'i,j' pair).
6188       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6189         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6190 
6191       Atot: number of entries belonging to the diagonal block
6192       Annz: number of unique nonzeros belonging to the diagonal block.
6193 
6194     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6195 
6196     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6197 */
6198 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6199 {
6200   PetscInt    cstart, cend, rstart, rend, row, col;
6201   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6202   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6203   PetscCount  k, m, p, q, r, s, mid;
6204   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6205 
6206   PetscFunctionBegin;
6207   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6208   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6209   m = rend - rstart;
6210 
6211   /* Skip negative rows */
6212   for (k = 0; k < n; k++)
6213     if (i[k] >= 0) break;
6214 
6215   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6216      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6217   */
6218   while (k < n) {
6219     row = i[k];
6220     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6221     for (s = k; s < n; s++)
6222       if (i[s] != row) break;
6223 
6224     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6225     for (p = k; p < s; p++) {
6226       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6227       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6228     }
6229     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6230     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6231     rowBegin[row - rstart] = k;
6232     rowMid[row - rstart]   = mid;
6233     rowEnd[row - rstart]   = s;
6234 
6235     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6236     Atot += mid - k;
6237     Btot += s - mid;
6238 
6239     /* Count unique nonzeros of this diag row */
6240     for (p = k; p < mid;) {
6241       col = j[p];
6242       do {
6243         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6244         p++;
6245       } while (p < mid && j[p] == col);
6246       Annz++;
6247     }
6248 
6249     /* Count unique nonzeros of this offdiag row */
6250     for (p = mid; p < s;) {
6251       col = j[p];
6252       do {
6253         p++;
6254       } while (p < s && j[p] == col);
6255       Bnnz++;
6256     }
6257     k = s;
6258   }
6259 
6260   /* Allocation according to Atot, Btot, Annz, Bnnz */
6261   PetscCall(PetscMalloc1(Atot, &Aperm));
6262   PetscCall(PetscMalloc1(Btot, &Bperm));
6263   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6264   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6265 
6266   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6267   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6268   for (r = 0; r < m; r++) {
6269     k   = rowBegin[r];
6270     mid = rowMid[r];
6271     s   = rowEnd[r];
6272     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6273     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6274     Atot += mid - k;
6275     Btot += s - mid;
6276 
6277     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6278     for (p = k; p < mid;) {
6279       col = j[p];
6280       q   = p;
6281       do {
6282         p++;
6283       } while (p < mid && j[p] == col);
6284       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6285       Annz++;
6286     }
6287 
6288     for (p = mid; p < s;) {
6289       col = j[p];
6290       q   = p;
6291       do {
6292         p++;
6293       } while (p < s && j[p] == col);
6294       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6295       Bnnz++;
6296     }
6297   }
6298   /* Output */
6299   *Aperm_ = Aperm;
6300   *Annz_  = Annz;
6301   *Atot_  = Atot;
6302   *Ajmap_ = Ajmap;
6303   *Bperm_ = Bperm;
6304   *Bnnz_  = Bnnz;
6305   *Btot_  = Btot;
6306   *Bjmap_ = Bjmap;
6307   PetscFunctionReturn(PETSC_SUCCESS);
6308 }
6309 
6310 /*
6311   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6312 
6313   Input Parameters:
6314     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6315     nnz:  number of unique nonzeros in the merged matrix
6316     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6317     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6318 
6319   Output Parameter: (memory is allocated by the caller)
6320     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6321 
6322   Example:
6323     nnz1 = 4
6324     nnz  = 6
6325     imap = [1,3,4,5]
6326     jmap = [0,3,5,6,7]
6327    then,
6328     jmap_new = [0,0,3,3,5,6,7]
6329 */
6330 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6331 {
6332   PetscCount k, p;
6333 
6334   PetscFunctionBegin;
6335   jmap_new[0] = 0;
6336   p           = nnz;                /* p loops over jmap_new[] backwards */
6337   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6338     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6339   }
6340   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6341   PetscFunctionReturn(PETSC_SUCCESS);
6342 }
6343 
6344 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6345 {
6346   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6347 
6348   PetscFunctionBegin;
6349   PetscCall(PetscSFDestroy(&coo->sf));
6350   PetscCall(PetscFree(coo->Aperm1));
6351   PetscCall(PetscFree(coo->Bperm1));
6352   PetscCall(PetscFree(coo->Ajmap1));
6353   PetscCall(PetscFree(coo->Bjmap1));
6354   PetscCall(PetscFree(coo->Aimap2));
6355   PetscCall(PetscFree(coo->Bimap2));
6356   PetscCall(PetscFree(coo->Aperm2));
6357   PetscCall(PetscFree(coo->Bperm2));
6358   PetscCall(PetscFree(coo->Ajmap2));
6359   PetscCall(PetscFree(coo->Bjmap2));
6360   PetscCall(PetscFree(coo->Cperm1));
6361   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6362   PetscCall(PetscFree(coo));
6363   PetscFunctionReturn(PETSC_SUCCESS);
6364 }
6365 
6366 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6367 {
6368   MPI_Comm             comm;
6369   PetscMPIInt          rank, size;
6370   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6371   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6372   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6373   PetscContainer       container;
6374   MatCOOStruct_MPIAIJ *coo;
6375 
6376   PetscFunctionBegin;
6377   PetscCall(PetscFree(mpiaij->garray));
6378   PetscCall(VecDestroy(&mpiaij->lvec));
6379 #if defined(PETSC_USE_CTABLE)
6380   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6381 #else
6382   PetscCall(PetscFree(mpiaij->colmap));
6383 #endif
6384   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6385   mat->assembled     = PETSC_FALSE;
6386   mat->was_assembled = PETSC_FALSE;
6387 
6388   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6389   PetscCallMPI(MPI_Comm_size(comm, &size));
6390   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6391   PetscCall(PetscLayoutSetUp(mat->rmap));
6392   PetscCall(PetscLayoutSetUp(mat->cmap));
6393   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6394   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6395   PetscCall(MatGetLocalSize(mat, &m, &n));
6396   PetscCall(MatGetSize(mat, &M, &N));
6397 
6398   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6399   /* entries come first, then local rows, then remote rows.                     */
6400   PetscCount n1 = coo_n, *perm1;
6401   PetscInt  *i1 = coo_i, *j1 = coo_j;
6402 
6403   PetscCall(PetscMalloc1(n1, &perm1));
6404   for (k = 0; k < n1; k++) perm1[k] = k;
6405 
6406   /* Manipulate indices so that entries with negative row or col indices will have smallest
6407      row indices, local entries will have greater but negative row indices, and remote entries
6408      will have positive row indices.
6409   */
6410   for (k = 0; k < n1; k++) {
6411     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6412     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6413     else {
6414       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6415       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6416     }
6417   }
6418 
6419   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6420   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6421 
6422   /* Advance k to the first entry we need to take care of */
6423   for (k = 0; k < n1; k++)
6424     if (i1[k] > PETSC_MIN_INT) break;
6425   PetscInt i1start = k;
6426 
6427   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6428   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6429 
6430   /*           Send remote rows to their owner                                  */
6431   /* Find which rows should be sent to which remote ranks*/
6432   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6433   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6434   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6435   const PetscInt *ranges;
6436   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6437 
6438   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6439   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6440   for (k = rem; k < n1;) {
6441     PetscMPIInt owner;
6442     PetscInt    firstRow, lastRow;
6443 
6444     /* Locate a row range */
6445     firstRow = i1[k]; /* first row of this owner */
6446     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6447     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6448 
6449     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6450     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6451 
6452     /* All entries in [k,p) belong to this remote owner */
6453     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6454       PetscMPIInt *sendto2;
6455       PetscInt    *nentries2;
6456       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6457 
6458       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6459       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6460       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6461       PetscCall(PetscFree2(sendto, nentries2));
6462       sendto   = sendto2;
6463       nentries = nentries2;
6464       maxNsend = maxNsend2;
6465     }
6466     sendto[nsend]   = owner;
6467     nentries[nsend] = p - k;
6468     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6469     nsend++;
6470     k = p;
6471   }
6472 
6473   /* Build 1st SF to know offsets on remote to send data */
6474   PetscSF      sf1;
6475   PetscInt     nroots = 1, nroots2 = 0;
6476   PetscInt     nleaves = nsend, nleaves2 = 0;
6477   PetscInt    *offsets;
6478   PetscSFNode *iremote;
6479 
6480   PetscCall(PetscSFCreate(comm, &sf1));
6481   PetscCall(PetscMalloc1(nsend, &iremote));
6482   PetscCall(PetscMalloc1(nsend, &offsets));
6483   for (k = 0; k < nsend; k++) {
6484     iremote[k].rank  = sendto[k];
6485     iremote[k].index = 0;
6486     nleaves2 += nentries[k];
6487     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6488   }
6489   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6490   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6491   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6492   PetscCall(PetscSFDestroy(&sf1));
6493   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6494 
6495   /* Build 2nd SF to send remote COOs to their owner */
6496   PetscSF sf2;
6497   nroots  = nroots2;
6498   nleaves = nleaves2;
6499   PetscCall(PetscSFCreate(comm, &sf2));
6500   PetscCall(PetscSFSetFromOptions(sf2));
6501   PetscCall(PetscMalloc1(nleaves, &iremote));
6502   p = 0;
6503   for (k = 0; k < nsend; k++) {
6504     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6505     for (q = 0; q < nentries[k]; q++, p++) {
6506       iremote[p].rank  = sendto[k];
6507       iremote[p].index = offsets[k] + q;
6508     }
6509   }
6510   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6511 
6512   /* Send the remote COOs to their owner */
6513   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6514   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6515   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6516   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6517   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6518   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6519   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6520 
6521   PetscCall(PetscFree(offsets));
6522   PetscCall(PetscFree2(sendto, nentries));
6523 
6524   /* Sort received COOs by row along with the permutation array     */
6525   for (k = 0; k < n2; k++) perm2[k] = k;
6526   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6527 
6528   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6529   PetscCount *Cperm1;
6530   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6531   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves));
6532 
6533   /* Support for HYPRE matrices, kind of a hack.
6534      Swap min column with diagonal so that diagonal values will go first */
6535   PetscBool   hypre;
6536   const char *name;
6537   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6538   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6539   if (hypre) {
6540     PetscInt *minj;
6541     PetscBT   hasdiag;
6542 
6543     PetscCall(PetscBTCreate(m, &hasdiag));
6544     PetscCall(PetscMalloc1(m, &minj));
6545     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6546     for (k = i1start; k < rem; k++) {
6547       if (j1[k] < cstart || j1[k] >= cend) continue;
6548       const PetscInt rindex = i1[k] - rstart;
6549       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6550       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6551     }
6552     for (k = 0; k < n2; k++) {
6553       if (j2[k] < cstart || j2[k] >= cend) continue;
6554       const PetscInt rindex = i2[k] - rstart;
6555       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6556       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6557     }
6558     for (k = i1start; k < rem; k++) {
6559       const PetscInt rindex = i1[k] - rstart;
6560       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6561       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6562       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6563     }
6564     for (k = 0; k < n2; k++) {
6565       const PetscInt rindex = i2[k] - rstart;
6566       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6567       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6568       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6569     }
6570     PetscCall(PetscBTDestroy(&hasdiag));
6571     PetscCall(PetscFree(minj));
6572   }
6573 
6574   /* Split local COOs and received COOs into diag/offdiag portions */
6575   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6576   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6577   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6578   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6579   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6580   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6581 
6582   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6583   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6584   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6585   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6586 
6587   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6588   PetscInt *Ai, *Bi;
6589   PetscInt *Aj, *Bj;
6590 
6591   PetscCall(PetscMalloc1(m + 1, &Ai));
6592   PetscCall(PetscMalloc1(m + 1, &Bi));
6593   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6594   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6595 
6596   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6597   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6598   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6599   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6600   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6601 
6602   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6603   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6604 
6605   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6606   /* expect nonzeros in A/B most likely have local contributing entries        */
6607   PetscInt    Annz = Ai[m];
6608   PetscInt    Bnnz = Bi[m];
6609   PetscCount *Ajmap1_new, *Bjmap1_new;
6610 
6611   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6612   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6613 
6614   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6615   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6616 
6617   PetscCall(PetscFree(Aimap1));
6618   PetscCall(PetscFree(Ajmap1));
6619   PetscCall(PetscFree(Bimap1));
6620   PetscCall(PetscFree(Bjmap1));
6621   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6622   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6623   PetscCall(PetscFree(perm1));
6624   PetscCall(PetscFree3(i2, j2, perm2));
6625 
6626   Ajmap1 = Ajmap1_new;
6627   Bjmap1 = Bjmap1_new;
6628 
6629   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6630   if (Annz < Annz1 + Annz2) {
6631     PetscInt *Aj_new;
6632     PetscCall(PetscMalloc1(Annz, &Aj_new));
6633     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6634     PetscCall(PetscFree(Aj));
6635     Aj = Aj_new;
6636   }
6637 
6638   if (Bnnz < Bnnz1 + Bnnz2) {
6639     PetscInt *Bj_new;
6640     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6641     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6642     PetscCall(PetscFree(Bj));
6643     Bj = Bj_new;
6644   }
6645 
6646   /* Create new submatrices for on-process and off-process coupling                  */
6647   PetscScalar     *Aa, *Ba;
6648   MatType          rtype;
6649   Mat_SeqAIJ      *a, *b;
6650   PetscObjectState state;
6651   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6652   PetscCall(PetscCalloc1(Bnnz, &Ba));
6653   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6654   if (cstart) {
6655     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6656   }
6657   PetscCall(MatDestroy(&mpiaij->A));
6658   PetscCall(MatDestroy(&mpiaij->B));
6659   PetscCall(MatGetRootType_Private(mat, &rtype));
6660   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6661   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6662   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6663   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6664   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6665   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6666 
6667   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6668   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6669   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6670   a->free_a = b->free_a = PETSC_TRUE;
6671   a->free_ij = b->free_ij = PETSC_TRUE;
6672 
6673   /* conversion must happen AFTER multiply setup */
6674   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6675   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6676   PetscCall(VecDestroy(&mpiaij->lvec));
6677   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6678 
6679   // Put the COO struct in a container and then attach that to the matrix
6680   PetscCall(PetscMalloc1(1, &coo));
6681   coo->n       = coo_n;
6682   coo->sf      = sf2;
6683   coo->sendlen = nleaves;
6684   coo->recvlen = nroots;
6685   coo->Annz    = Annz;
6686   coo->Bnnz    = Bnnz;
6687   coo->Annz2   = Annz2;
6688   coo->Bnnz2   = Bnnz2;
6689   coo->Atot1   = Atot1;
6690   coo->Atot2   = Atot2;
6691   coo->Btot1   = Btot1;
6692   coo->Btot2   = Btot2;
6693   coo->Ajmap1  = Ajmap1;
6694   coo->Aperm1  = Aperm1;
6695   coo->Bjmap1  = Bjmap1;
6696   coo->Bperm1  = Bperm1;
6697   coo->Aimap2  = Aimap2;
6698   coo->Ajmap2  = Ajmap2;
6699   coo->Aperm2  = Aperm2;
6700   coo->Bimap2  = Bimap2;
6701   coo->Bjmap2  = Bjmap2;
6702   coo->Bperm2  = Bperm2;
6703   coo->Cperm1  = Cperm1;
6704   // Allocate in preallocation. If not used, it has zero cost on host
6705   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6706   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6707   PetscCall(PetscContainerSetPointer(container, coo));
6708   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6709   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6710   PetscCall(PetscContainerDestroy(&container));
6711   PetscFunctionReturn(PETSC_SUCCESS);
6712 }
6713 
6714 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6715 {
6716   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6717   Mat                  A = mpiaij->A, B = mpiaij->B;
6718   PetscScalar         *Aa, *Ba;
6719   PetscScalar         *sendbuf, *recvbuf;
6720   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6721   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6722   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6723   const PetscCount    *Cperm1;
6724   PetscContainer       container;
6725   MatCOOStruct_MPIAIJ *coo;
6726 
6727   PetscFunctionBegin;
6728   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6729   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6730   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6731   sendbuf = coo->sendbuf;
6732   recvbuf = coo->recvbuf;
6733   Ajmap1  = coo->Ajmap1;
6734   Ajmap2  = coo->Ajmap2;
6735   Aimap2  = coo->Aimap2;
6736   Bjmap1  = coo->Bjmap1;
6737   Bjmap2  = coo->Bjmap2;
6738   Bimap2  = coo->Bimap2;
6739   Aperm1  = coo->Aperm1;
6740   Aperm2  = coo->Aperm2;
6741   Bperm1  = coo->Bperm1;
6742   Bperm2  = coo->Bperm2;
6743   Cperm1  = coo->Cperm1;
6744 
6745   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6746   PetscCall(MatSeqAIJGetArray(B, &Ba));
6747 
6748   /* Pack entries to be sent to remote */
6749   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6750 
6751   /* Send remote entries to their owner and overlap the communication with local computation */
6752   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6753   /* Add local entries to A and B */
6754   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6755     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6756     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6757     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6758   }
6759   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6760     PetscScalar sum = 0.0;
6761     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6762     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6763   }
6764   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6765 
6766   /* Add received remote entries to A and B */
6767   for (PetscCount i = 0; i < coo->Annz2; i++) {
6768     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6769   }
6770   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6771     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6772   }
6773   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6774   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6775   PetscFunctionReturn(PETSC_SUCCESS);
6776 }
6777 
6778 /*MC
6779    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6780 
6781    Options Database Keys:
6782 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6783 
6784    Level: beginner
6785 
6786    Notes:
6787    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6788     in this case the values associated with the rows and columns one passes in are set to zero
6789     in the matrix
6790 
6791     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6792     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6793 
6794 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6795 M*/
6796 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6797 {
6798   Mat_MPIAIJ *b;
6799   PetscMPIInt size;
6800 
6801   PetscFunctionBegin;
6802   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6803 
6804   PetscCall(PetscNew(&b));
6805   B->data       = (void *)b;
6806   B->ops[0]     = MatOps_Values;
6807   B->assembled  = PETSC_FALSE;
6808   B->insertmode = NOT_SET_VALUES;
6809   b->size       = size;
6810 
6811   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6812 
6813   /* build cache for off array entries formed */
6814   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6815 
6816   b->donotstash  = PETSC_FALSE;
6817   b->colmap      = NULL;
6818   b->garray      = NULL;
6819   b->roworiented = PETSC_TRUE;
6820 
6821   /* stuff used for matrix vector multiply */
6822   b->lvec  = NULL;
6823   b->Mvctx = NULL;
6824 
6825   /* stuff for MatGetRow() */
6826   b->rowindices   = NULL;
6827   b->rowvalues    = NULL;
6828   b->getrowactive = PETSC_FALSE;
6829 
6830   /* flexible pointer used in CUSPARSE classes */
6831   b->spptr = NULL;
6832 
6833   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6834   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6835   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6837   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6842   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6843 #if defined(PETSC_HAVE_CUDA)
6844   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6845 #endif
6846 #if defined(PETSC_HAVE_HIP)
6847   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6848 #endif
6849 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6850   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6851 #endif
6852 #if defined(PETSC_HAVE_MKL_SPARSE)
6853   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6854 #endif
6855   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6856   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6858   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6859 #if defined(PETSC_HAVE_ELEMENTAL)
6860   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6861 #endif
6862 #if defined(PETSC_HAVE_SCALAPACK)
6863   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6864 #endif
6865   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6866   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6867 #if defined(PETSC_HAVE_HYPRE)
6868   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6869   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6870 #endif
6871   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6872   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6873   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6874   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6875   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6876   PetscFunctionReturn(PETSC_SUCCESS);
6877 }
6878 
6879 /*@C
6880   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6881   and "off-diagonal" part of the matrix in CSR format.
6882 
6883   Collective
6884 
6885   Input Parameters:
6886 + comm - MPI communicator
6887 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6888 . n    - This value should be the same as the local size used in creating the
6889        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6890        calculated if `N` is given) For square matrices `n` is almost always `m`.
6891 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6892 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6893 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6894 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6895 . a    - matrix values
6896 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6897 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6898 - oa   - matrix values
6899 
6900   Output Parameter:
6901 . mat - the matrix
6902 
6903   Level: advanced
6904 
6905   Notes:
6906   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6907   must free the arrays once the matrix has been destroyed and not before.
6908 
6909   The `i` and `j` indices are 0 based
6910 
6911   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6912 
6913   This sets local rows and cannot be used to set off-processor values.
6914 
6915   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6916   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6917   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6918   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6919   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6920   communication if it is known that only local entries will be set.
6921 
6922 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6923           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6924 @*/
6925 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6926 {
6927   Mat_MPIAIJ *maij;
6928 
6929   PetscFunctionBegin;
6930   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6931   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6932   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6933   PetscCall(MatCreate(comm, mat));
6934   PetscCall(MatSetSizes(*mat, m, n, M, N));
6935   PetscCall(MatSetType(*mat, MATMPIAIJ));
6936   maij = (Mat_MPIAIJ *)(*mat)->data;
6937 
6938   (*mat)->preallocated = PETSC_TRUE;
6939 
6940   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6941   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6942 
6943   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6944   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6945 
6946   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6947   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6948   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6949   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6950   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6951   PetscFunctionReturn(PETSC_SUCCESS);
6952 }
6953 
6954 typedef struct {
6955   Mat       *mp;    /* intermediate products */
6956   PetscBool *mptmp; /* is the intermediate product temporary ? */
6957   PetscInt   cp;    /* number of intermediate products */
6958 
6959   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6960   PetscInt    *startsj_s, *startsj_r;
6961   PetscScalar *bufa;
6962   Mat          P_oth;
6963 
6964   /* may take advantage of merging product->B */
6965   Mat Bloc; /* B-local by merging diag and off-diag */
6966 
6967   /* cusparse does not have support to split between symbolic and numeric phases.
6968      When api_user is true, we don't need to update the numerical values
6969      of the temporary storage */
6970   PetscBool reusesym;
6971 
6972   /* support for COO values insertion */
6973   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6974   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6975   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6976   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6977   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6978   PetscMemType mtype;
6979 
6980   /* customization */
6981   PetscBool abmerge;
6982   PetscBool P_oth_bind;
6983 } MatMatMPIAIJBACKEND;
6984 
6985 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6986 {
6987   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6988   PetscInt             i;
6989 
6990   PetscFunctionBegin;
6991   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6992   PetscCall(PetscFree(mmdata->bufa));
6993   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6994   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6995   PetscCall(MatDestroy(&mmdata->P_oth));
6996   PetscCall(MatDestroy(&mmdata->Bloc));
6997   PetscCall(PetscSFDestroy(&mmdata->sf));
6998   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6999   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7000   PetscCall(PetscFree(mmdata->own[0]));
7001   PetscCall(PetscFree(mmdata->own));
7002   PetscCall(PetscFree(mmdata->off[0]));
7003   PetscCall(PetscFree(mmdata->off));
7004   PetscCall(PetscFree(mmdata));
7005   PetscFunctionReturn(PETSC_SUCCESS);
7006 }
7007 
7008 /* Copy selected n entries with indices in idx[] of A to v[].
7009    If idx is NULL, copy the whole data array of A to v[]
7010  */
7011 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7012 {
7013   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7014 
7015   PetscFunctionBegin;
7016   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7017   if (f) {
7018     PetscCall((*f)(A, n, idx, v));
7019   } else {
7020     const PetscScalar *vv;
7021 
7022     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7023     if (n && idx) {
7024       PetscScalar    *w  = v;
7025       const PetscInt *oi = idx;
7026       PetscInt        j;
7027 
7028       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7029     } else {
7030       PetscCall(PetscArraycpy(v, vv, n));
7031     }
7032     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7033   }
7034   PetscFunctionReturn(PETSC_SUCCESS);
7035 }
7036 
7037 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7038 {
7039   MatMatMPIAIJBACKEND *mmdata;
7040   PetscInt             i, n_d, n_o;
7041 
7042   PetscFunctionBegin;
7043   MatCheckProduct(C, 1);
7044   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7045   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7046   if (!mmdata->reusesym) { /* update temporary matrices */
7047     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7048     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7049   }
7050   mmdata->reusesym = PETSC_FALSE;
7051 
7052   for (i = 0; i < mmdata->cp; i++) {
7053     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7054     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7055   }
7056   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7057     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7058 
7059     if (mmdata->mptmp[i]) continue;
7060     if (noff) {
7061       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7062 
7063       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7064       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7065       n_o += noff;
7066       n_d += nown;
7067     } else {
7068       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7069 
7070       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7071       n_d += mm->nz;
7072     }
7073   }
7074   if (mmdata->hasoffproc) { /* offprocess insertion */
7075     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7076     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7077   }
7078   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7079   PetscFunctionReturn(PETSC_SUCCESS);
7080 }
7081 
7082 /* Support for Pt * A, A * P, or Pt * A * P */
7083 #define MAX_NUMBER_INTERMEDIATE 4
7084 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7085 {
7086   Mat_Product           *product = C->product;
7087   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7088   Mat_MPIAIJ            *a, *p;
7089   MatMatMPIAIJBACKEND   *mmdata;
7090   ISLocalToGlobalMapping P_oth_l2g = NULL;
7091   IS                     glob      = NULL;
7092   const char            *prefix;
7093   char                   pprefix[256];
7094   const PetscInt        *globidx, *P_oth_idx;
7095   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7096   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7097   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7098                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7099                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7100   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7101 
7102   MatProductType ptype;
7103   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7104   PetscMPIInt    size;
7105 
7106   PetscFunctionBegin;
7107   MatCheckProduct(C, 1);
7108   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7109   ptype = product->type;
7110   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7111     ptype                                          = MATPRODUCT_AB;
7112     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7113   }
7114   switch (ptype) {
7115   case MATPRODUCT_AB:
7116     A          = product->A;
7117     P          = product->B;
7118     m          = A->rmap->n;
7119     n          = P->cmap->n;
7120     M          = A->rmap->N;
7121     N          = P->cmap->N;
7122     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7123     break;
7124   case MATPRODUCT_AtB:
7125     P          = product->A;
7126     A          = product->B;
7127     m          = P->cmap->n;
7128     n          = A->cmap->n;
7129     M          = P->cmap->N;
7130     N          = A->cmap->N;
7131     hasoffproc = PETSC_TRUE;
7132     break;
7133   case MATPRODUCT_PtAP:
7134     A          = product->A;
7135     P          = product->B;
7136     m          = P->cmap->n;
7137     n          = P->cmap->n;
7138     M          = P->cmap->N;
7139     N          = P->cmap->N;
7140     hasoffproc = PETSC_TRUE;
7141     break;
7142   default:
7143     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7144   }
7145   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7146   if (size == 1) hasoffproc = PETSC_FALSE;
7147 
7148   /* defaults */
7149   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7150     mp[i]    = NULL;
7151     mptmp[i] = PETSC_FALSE;
7152     rmapt[i] = -1;
7153     cmapt[i] = -1;
7154     rmapa[i] = NULL;
7155     cmapa[i] = NULL;
7156   }
7157 
7158   /* customization */
7159   PetscCall(PetscNew(&mmdata));
7160   mmdata->reusesym = product->api_user;
7161   if (ptype == MATPRODUCT_AB) {
7162     if (product->api_user) {
7163       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7164       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7165       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7166       PetscOptionsEnd();
7167     } else {
7168       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7169       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7170       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7171       PetscOptionsEnd();
7172     }
7173   } else if (ptype == MATPRODUCT_PtAP) {
7174     if (product->api_user) {
7175       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7176       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7177       PetscOptionsEnd();
7178     } else {
7179       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7180       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7181       PetscOptionsEnd();
7182     }
7183   }
7184   a = (Mat_MPIAIJ *)A->data;
7185   p = (Mat_MPIAIJ *)P->data;
7186   PetscCall(MatSetSizes(C, m, n, M, N));
7187   PetscCall(PetscLayoutSetUp(C->rmap));
7188   PetscCall(PetscLayoutSetUp(C->cmap));
7189   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7190   PetscCall(MatGetOptionsPrefix(C, &prefix));
7191 
7192   cp = 0;
7193   switch (ptype) {
7194   case MATPRODUCT_AB: /* A * P */
7195     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7196 
7197     /* A_diag * P_local (merged or not) */
7198     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7199       /* P is product->B */
7200       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7201       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7202       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7203       PetscCall(MatProductSetFill(mp[cp], product->fill));
7204       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7205       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7206       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7207       mp[cp]->product->api_user = product->api_user;
7208       PetscCall(MatProductSetFromOptions(mp[cp]));
7209       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7210       PetscCall(ISGetIndices(glob, &globidx));
7211       rmapt[cp] = 1;
7212       cmapt[cp] = 2;
7213       cmapa[cp] = globidx;
7214       mptmp[cp] = PETSC_FALSE;
7215       cp++;
7216     } else { /* A_diag * P_diag and A_diag * P_off */
7217       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7218       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7219       PetscCall(MatProductSetFill(mp[cp], product->fill));
7220       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7221       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7222       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7223       mp[cp]->product->api_user = product->api_user;
7224       PetscCall(MatProductSetFromOptions(mp[cp]));
7225       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7226       rmapt[cp] = 1;
7227       cmapt[cp] = 1;
7228       mptmp[cp] = PETSC_FALSE;
7229       cp++;
7230       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7231       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7232       PetscCall(MatProductSetFill(mp[cp], product->fill));
7233       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7234       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7235       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7236       mp[cp]->product->api_user = product->api_user;
7237       PetscCall(MatProductSetFromOptions(mp[cp]));
7238       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7239       rmapt[cp] = 1;
7240       cmapt[cp] = 2;
7241       cmapa[cp] = p->garray;
7242       mptmp[cp] = PETSC_FALSE;
7243       cp++;
7244     }
7245 
7246     /* A_off * P_other */
7247     if (mmdata->P_oth) {
7248       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7249       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7250       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7251       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7252       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7253       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7254       PetscCall(MatProductSetFill(mp[cp], product->fill));
7255       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7256       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7257       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7258       mp[cp]->product->api_user = product->api_user;
7259       PetscCall(MatProductSetFromOptions(mp[cp]));
7260       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7261       rmapt[cp] = 1;
7262       cmapt[cp] = 2;
7263       cmapa[cp] = P_oth_idx;
7264       mptmp[cp] = PETSC_FALSE;
7265       cp++;
7266     }
7267     break;
7268 
7269   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7270     /* A is product->B */
7271     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7272     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7273       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7274       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7275       PetscCall(MatProductSetFill(mp[cp], product->fill));
7276       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7277       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7278       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7279       mp[cp]->product->api_user = product->api_user;
7280       PetscCall(MatProductSetFromOptions(mp[cp]));
7281       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7282       PetscCall(ISGetIndices(glob, &globidx));
7283       rmapt[cp] = 2;
7284       rmapa[cp] = globidx;
7285       cmapt[cp] = 2;
7286       cmapa[cp] = globidx;
7287       mptmp[cp] = PETSC_FALSE;
7288       cp++;
7289     } else {
7290       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7291       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7292       PetscCall(MatProductSetFill(mp[cp], product->fill));
7293       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7294       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7295       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7296       mp[cp]->product->api_user = product->api_user;
7297       PetscCall(MatProductSetFromOptions(mp[cp]));
7298       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7299       PetscCall(ISGetIndices(glob, &globidx));
7300       rmapt[cp] = 1;
7301       cmapt[cp] = 2;
7302       cmapa[cp] = globidx;
7303       mptmp[cp] = PETSC_FALSE;
7304       cp++;
7305       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7306       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7307       PetscCall(MatProductSetFill(mp[cp], product->fill));
7308       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7309       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7310       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7311       mp[cp]->product->api_user = product->api_user;
7312       PetscCall(MatProductSetFromOptions(mp[cp]));
7313       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7314       rmapt[cp] = 2;
7315       rmapa[cp] = p->garray;
7316       cmapt[cp] = 2;
7317       cmapa[cp] = globidx;
7318       mptmp[cp] = PETSC_FALSE;
7319       cp++;
7320     }
7321     break;
7322   case MATPRODUCT_PtAP:
7323     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7324     /* P is product->B */
7325     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7326     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7327     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7328     PetscCall(MatProductSetFill(mp[cp], product->fill));
7329     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7330     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7331     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7332     mp[cp]->product->api_user = product->api_user;
7333     PetscCall(MatProductSetFromOptions(mp[cp]));
7334     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7335     PetscCall(ISGetIndices(glob, &globidx));
7336     rmapt[cp] = 2;
7337     rmapa[cp] = globidx;
7338     cmapt[cp] = 2;
7339     cmapa[cp] = globidx;
7340     mptmp[cp] = PETSC_FALSE;
7341     cp++;
7342     if (mmdata->P_oth) {
7343       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7344       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7345       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7346       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7347       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7348       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7349       PetscCall(MatProductSetFill(mp[cp], product->fill));
7350       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7351       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7352       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7353       mp[cp]->product->api_user = product->api_user;
7354       PetscCall(MatProductSetFromOptions(mp[cp]));
7355       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7356       mptmp[cp] = PETSC_TRUE;
7357       cp++;
7358       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7359       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7360       PetscCall(MatProductSetFill(mp[cp], product->fill));
7361       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7362       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7363       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7364       mp[cp]->product->api_user = product->api_user;
7365       PetscCall(MatProductSetFromOptions(mp[cp]));
7366       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7367       rmapt[cp] = 2;
7368       rmapa[cp] = globidx;
7369       cmapt[cp] = 2;
7370       cmapa[cp] = P_oth_idx;
7371       mptmp[cp] = PETSC_FALSE;
7372       cp++;
7373     }
7374     break;
7375   default:
7376     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7377   }
7378   /* sanity check */
7379   if (size > 1)
7380     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7381 
7382   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7383   for (i = 0; i < cp; i++) {
7384     mmdata->mp[i]    = mp[i];
7385     mmdata->mptmp[i] = mptmp[i];
7386   }
7387   mmdata->cp             = cp;
7388   C->product->data       = mmdata;
7389   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7390   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7391 
7392   /* memory type */
7393   mmdata->mtype = PETSC_MEMTYPE_HOST;
7394   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7395   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7396   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7397   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7398   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7399   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7400 
7401   /* prepare coo coordinates for values insertion */
7402 
7403   /* count total nonzeros of those intermediate seqaij Mats
7404     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7405     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7406     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7407   */
7408   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7409     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7410     if (mptmp[cp]) continue;
7411     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7412       const PetscInt *rmap = rmapa[cp];
7413       const PetscInt  mr   = mp[cp]->rmap->n;
7414       const PetscInt  rs   = C->rmap->rstart;
7415       const PetscInt  re   = C->rmap->rend;
7416       const PetscInt *ii   = mm->i;
7417       for (i = 0; i < mr; i++) {
7418         const PetscInt gr = rmap[i];
7419         const PetscInt nz = ii[i + 1] - ii[i];
7420         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7421         else ncoo_oown += nz;                  /* this row is local */
7422       }
7423     } else ncoo_d += mm->nz;
7424   }
7425 
7426   /*
7427     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7428 
7429     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7430 
7431     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7432 
7433     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7434     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7435     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7436 
7437     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7438     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7439   */
7440   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7441   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7442 
7443   /* gather (i,j) of nonzeros inserted by remote procs */
7444   if (hasoffproc) {
7445     PetscSF  msf;
7446     PetscInt ncoo2, *coo_i2, *coo_j2;
7447 
7448     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7449     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7450     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7451 
7452     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7453       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7454       PetscInt   *idxoff = mmdata->off[cp];
7455       PetscInt   *idxown = mmdata->own[cp];
7456       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7457         const PetscInt *rmap = rmapa[cp];
7458         const PetscInt *cmap = cmapa[cp];
7459         const PetscInt *ii   = mm->i;
7460         PetscInt       *coi  = coo_i + ncoo_o;
7461         PetscInt       *coj  = coo_j + ncoo_o;
7462         const PetscInt  mr   = mp[cp]->rmap->n;
7463         const PetscInt  rs   = C->rmap->rstart;
7464         const PetscInt  re   = C->rmap->rend;
7465         const PetscInt  cs   = C->cmap->rstart;
7466         for (i = 0; i < mr; i++) {
7467           const PetscInt *jj = mm->j + ii[i];
7468           const PetscInt  gr = rmap[i];
7469           const PetscInt  nz = ii[i + 1] - ii[i];
7470           if (gr < rs || gr >= re) { /* this is an offproc row */
7471             for (j = ii[i]; j < ii[i + 1]; j++) {
7472               *coi++    = gr;
7473               *idxoff++ = j;
7474             }
7475             if (!cmapt[cp]) { /* already global */
7476               for (j = 0; j < nz; j++) *coj++ = jj[j];
7477             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7478               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7479             } else { /* offdiag */
7480               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7481             }
7482             ncoo_o += nz;
7483           } else { /* this is a local row */
7484             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7485           }
7486         }
7487       }
7488       mmdata->off[cp + 1] = idxoff;
7489       mmdata->own[cp + 1] = idxown;
7490     }
7491 
7492     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7493     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7494     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7495     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7496     ncoo = ncoo_d + ncoo_oown + ncoo2;
7497     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7498     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7499     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7500     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7501     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7502     PetscCall(PetscFree2(coo_i, coo_j));
7503     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7504     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7505     coo_i = coo_i2;
7506     coo_j = coo_j2;
7507   } else { /* no offproc values insertion */
7508     ncoo = ncoo_d;
7509     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7510 
7511     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7512     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7513     PetscCall(PetscSFSetUp(mmdata->sf));
7514   }
7515   mmdata->hasoffproc = hasoffproc;
7516 
7517   /* gather (i,j) of nonzeros inserted locally */
7518   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7519     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7520     PetscInt       *coi  = coo_i + ncoo_d;
7521     PetscInt       *coj  = coo_j + ncoo_d;
7522     const PetscInt *jj   = mm->j;
7523     const PetscInt *ii   = mm->i;
7524     const PetscInt *cmap = cmapa[cp];
7525     const PetscInt *rmap = rmapa[cp];
7526     const PetscInt  mr   = mp[cp]->rmap->n;
7527     const PetscInt  rs   = C->rmap->rstart;
7528     const PetscInt  re   = C->rmap->rend;
7529     const PetscInt  cs   = C->cmap->rstart;
7530 
7531     if (mptmp[cp]) continue;
7532     if (rmapt[cp] == 1) { /* consecutive rows */
7533       /* fill coo_i */
7534       for (i = 0; i < mr; i++) {
7535         const PetscInt gr = i + rs;
7536         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7537       }
7538       /* fill coo_j */
7539       if (!cmapt[cp]) { /* type-0, already global */
7540         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7541       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7542         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7543       } else {                                            /* type-2, local to global for sparse columns */
7544         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7545       }
7546       ncoo_d += mm->nz;
7547     } else if (rmapt[cp] == 2) { /* sparse rows */
7548       for (i = 0; i < mr; i++) {
7549         const PetscInt *jj = mm->j + ii[i];
7550         const PetscInt  gr = rmap[i];
7551         const PetscInt  nz = ii[i + 1] - ii[i];
7552         if (gr >= rs && gr < re) { /* local rows */
7553           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7554           if (!cmapt[cp]) { /* type-0, already global */
7555             for (j = 0; j < nz; j++) *coj++ = jj[j];
7556           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7557             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7558           } else { /* type-2, local to global for sparse columns */
7559             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7560           }
7561           ncoo_d += nz;
7562         }
7563       }
7564     }
7565   }
7566   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7567   PetscCall(ISDestroy(&glob));
7568   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7569   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7570   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7571   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7572 
7573   /* preallocate with COO data */
7574   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7575   PetscCall(PetscFree2(coo_i, coo_j));
7576   PetscFunctionReturn(PETSC_SUCCESS);
7577 }
7578 
7579 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7580 {
7581   Mat_Product *product = mat->product;
7582 #if defined(PETSC_HAVE_DEVICE)
7583   PetscBool match  = PETSC_FALSE;
7584   PetscBool usecpu = PETSC_FALSE;
7585 #else
7586   PetscBool match = PETSC_TRUE;
7587 #endif
7588 
7589   PetscFunctionBegin;
7590   MatCheckProduct(mat, 1);
7591 #if defined(PETSC_HAVE_DEVICE)
7592   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7593   if (match) { /* we can always fallback to the CPU if requested */
7594     switch (product->type) {
7595     case MATPRODUCT_AB:
7596       if (product->api_user) {
7597         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7598         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7599         PetscOptionsEnd();
7600       } else {
7601         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7602         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7603         PetscOptionsEnd();
7604       }
7605       break;
7606     case MATPRODUCT_AtB:
7607       if (product->api_user) {
7608         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7609         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7610         PetscOptionsEnd();
7611       } else {
7612         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7613         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7614         PetscOptionsEnd();
7615       }
7616       break;
7617     case MATPRODUCT_PtAP:
7618       if (product->api_user) {
7619         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7620         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7621         PetscOptionsEnd();
7622       } else {
7623         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7624         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7625         PetscOptionsEnd();
7626       }
7627       break;
7628     default:
7629       break;
7630     }
7631     match = (PetscBool)!usecpu;
7632   }
7633 #endif
7634   if (match) {
7635     switch (product->type) {
7636     case MATPRODUCT_AB:
7637     case MATPRODUCT_AtB:
7638     case MATPRODUCT_PtAP:
7639       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7640       break;
7641     default:
7642       break;
7643     }
7644   }
7645   /* fallback to MPIAIJ ops */
7646   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7647   PetscFunctionReturn(PETSC_SUCCESS);
7648 }
7649 
7650 /*
7651    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7652 
7653    n - the number of block indices in cc[]
7654    cc - the block indices (must be large enough to contain the indices)
7655 */
7656 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7657 {
7658   PetscInt        cnt = -1, nidx, j;
7659   const PetscInt *idx;
7660 
7661   PetscFunctionBegin;
7662   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7663   if (nidx) {
7664     cnt     = 0;
7665     cc[cnt] = idx[0] / bs;
7666     for (j = 1; j < nidx; j++) {
7667       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7668     }
7669   }
7670   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7671   *n = cnt + 1;
7672   PetscFunctionReturn(PETSC_SUCCESS);
7673 }
7674 
7675 /*
7676     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7677 
7678     ncollapsed - the number of block indices
7679     collapsed - the block indices (must be large enough to contain the indices)
7680 */
7681 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7682 {
7683   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7684 
7685   PetscFunctionBegin;
7686   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7687   for (i = start + 1; i < start + bs; i++) {
7688     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7689     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7690     cprevtmp = cprev;
7691     cprev    = merged;
7692     merged   = cprevtmp;
7693   }
7694   *ncollapsed = nprev;
7695   if (collapsed) *collapsed = cprev;
7696   PetscFunctionReturn(PETSC_SUCCESS);
7697 }
7698 
7699 /*
7700  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7701 
7702  Input Parameter:
7703  . Amat - matrix
7704  - symmetrize - make the result symmetric
7705  + scale - scale with diagonal
7706 
7707  Output Parameter:
7708  . a_Gmat - output scalar graph >= 0
7709 
7710 */
7711 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7712 {
7713   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7714   MPI_Comm  comm;
7715   Mat       Gmat;
7716   PetscBool ismpiaij, isseqaij;
7717   Mat       a, b, c;
7718   MatType   jtype;
7719 
7720   PetscFunctionBegin;
7721   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7722   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7723   PetscCall(MatGetSize(Amat, &MM, &NN));
7724   PetscCall(MatGetBlockSize(Amat, &bs));
7725   nloc = (Iend - Istart) / bs;
7726 
7727   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7728   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7729   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7730 
7731   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7732   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7733      implementation */
7734   if (bs > 1) {
7735     PetscCall(MatGetType(Amat, &jtype));
7736     PetscCall(MatCreate(comm, &Gmat));
7737     PetscCall(MatSetType(Gmat, jtype));
7738     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7739     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7740     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7741       PetscInt  *d_nnz, *o_nnz;
7742       MatScalar *aa, val, *AA;
7743       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7744       if (isseqaij) {
7745         a = Amat;
7746         b = NULL;
7747       } else {
7748         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7749         a             = d->A;
7750         b             = d->B;
7751       }
7752       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7753       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7754       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7755         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7756         const PetscInt *cols1, *cols2;
7757         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7758           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7759           nnz[brow / bs] = nc2 / bs;
7760           if (nc2 % bs) ok = 0;
7761           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7762           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7763             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7764             if (nc1 != nc2) ok = 0;
7765             else {
7766               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7767                 if (cols1[jj] != cols2[jj]) ok = 0;
7768                 if (cols1[jj] % bs != jj % bs) ok = 0;
7769               }
7770             }
7771             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7772           }
7773           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7774           if (!ok) {
7775             PetscCall(PetscFree2(d_nnz, o_nnz));
7776             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7777             goto old_bs;
7778           }
7779         }
7780       }
7781       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7782       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7783       PetscCall(PetscFree2(d_nnz, o_nnz));
7784       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7785       // diag
7786       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7787         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7788         ai               = aseq->i;
7789         n                = ai[brow + 1] - ai[brow];
7790         aj               = aseq->j + ai[brow];
7791         for (int k = 0; k < n; k += bs) {        // block columns
7792           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7793           val        = 0;
7794           if (index_size == 0) {
7795             for (int ii = 0; ii < bs; ii++) { // rows in block
7796               aa = aseq->a + ai[brow + ii] + k;
7797               for (int jj = 0; jj < bs; jj++) {         // columns in block
7798                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7799               }
7800             }
7801           } else {                                       // use (index,index) value if provided
7802             for (int iii = 0; iii < index_size; iii++) { // rows in block
7803               int ii = index[iii];
7804               aa     = aseq->a + ai[brow + ii] + k;
7805               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7806                 int jj = index[jjj];
7807                 val    = PetscAbs(PetscRealPart(aa[jj]));
7808               }
7809             }
7810           }
7811           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7812           AA[k / bs] = val;
7813         }
7814         grow = Istart / bs + brow / bs;
7815         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7816       }
7817       // off-diag
7818       if (ismpiaij) {
7819         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7820         const PetscScalar *vals;
7821         const PetscInt    *cols, *garray = aij->garray;
7822         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7823         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7824           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7825           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7826             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7827             AA[k / bs] = 0;
7828             AJ[cidx]   = garray[cols[k]] / bs;
7829           }
7830           nc = ncols / bs;
7831           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7832           if (index_size == 0) {
7833             for (int ii = 0; ii < bs; ii++) { // rows in block
7834               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7835               for (int k = 0; k < ncols; k += bs) {
7836                 for (int jj = 0; jj < bs; jj++) { // cols in block
7837                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7838                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7839                 }
7840               }
7841               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7842             }
7843           } else {                                       // use (index,index) value if provided
7844             for (int iii = 0; iii < index_size; iii++) { // rows in block
7845               int ii = index[iii];
7846               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7847               for (int k = 0; k < ncols; k += bs) {
7848                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7849                   int jj = index[jjj];
7850                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7851                 }
7852               }
7853               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7854             }
7855           }
7856           grow = Istart / bs + brow / bs;
7857           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7858         }
7859       }
7860       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7861       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7862       PetscCall(PetscFree2(AA, AJ));
7863     } else {
7864       const PetscScalar *vals;
7865       const PetscInt    *idx;
7866       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7867     old_bs:
7868       /*
7869        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7870        */
7871       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7872       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7873       if (isseqaij) {
7874         PetscInt max_d_nnz;
7875         /*
7876          Determine exact preallocation count for (sequential) scalar matrix
7877          */
7878         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7879         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7880         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7881         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7882         PetscCall(PetscFree3(w0, w1, w2));
7883       } else if (ismpiaij) {
7884         Mat             Daij, Oaij;
7885         const PetscInt *garray;
7886         PetscInt        max_d_nnz;
7887         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7888         /*
7889          Determine exact preallocation count for diagonal block portion of scalar matrix
7890          */
7891         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7892         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7893         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7894         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7895         PetscCall(PetscFree3(w0, w1, w2));
7896         /*
7897          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7898          */
7899         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7900           o_nnz[jj] = 0;
7901           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7902             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7903             o_nnz[jj] += ncols;
7904             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7905           }
7906           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7907         }
7908       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7909       /* get scalar copy (norms) of matrix */
7910       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7911       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7912       PetscCall(PetscFree2(d_nnz, o_nnz));
7913       for (Ii = Istart; Ii < Iend; Ii++) {
7914         PetscInt dest_row = Ii / bs;
7915         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7916         for (jj = 0; jj < ncols; jj++) {
7917           PetscInt    dest_col = idx[jj] / bs;
7918           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7919           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7920         }
7921         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7922       }
7923       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7924       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7925     }
7926   } else {
7927     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7928     else {
7929       Gmat = Amat;
7930       PetscCall(PetscObjectReference((PetscObject)Gmat));
7931     }
7932     if (isseqaij) {
7933       a = Gmat;
7934       b = NULL;
7935     } else {
7936       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7937       a             = d->A;
7938       b             = d->B;
7939     }
7940     if (filter >= 0 || scale) {
7941       /* take absolute value of each entry */
7942       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7943         MatInfo      info;
7944         PetscScalar *avals;
7945         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7946         PetscCall(MatSeqAIJGetArray(c, &avals));
7947         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7948         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7949       }
7950     }
7951   }
7952   if (symmetrize) {
7953     PetscBool isset, issym;
7954     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7955     if (!isset || !issym) {
7956       Mat matTrans;
7957       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7958       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7959       PetscCall(MatDestroy(&matTrans));
7960     }
7961     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7962   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7963   if (scale) {
7964     /* scale c for all diagonal values = 1 or -1 */
7965     Vec diag;
7966     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7967     PetscCall(MatGetDiagonal(Gmat, diag));
7968     PetscCall(VecReciprocal(diag));
7969     PetscCall(VecSqrtAbs(diag));
7970     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7971     PetscCall(VecDestroy(&diag));
7972   }
7973   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7974 
7975   if (filter >= 0) {
7976     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
7977     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
7978   }
7979   *a_Gmat = Gmat;
7980   PetscFunctionReturn(PETSC_SUCCESS);
7981 }
7982 
7983 /*
7984     Special version for direct calls from Fortran
7985 */
7986 #include <petsc/private/fortranimpl.h>
7987 
7988 /* Change these macros so can be used in void function */
7989 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7990 #undef PetscCall
7991 #define PetscCall(...) \
7992   do { \
7993     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
7994     if (PetscUnlikely(ierr_msv_mpiaij)) { \
7995       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
7996       return; \
7997     } \
7998   } while (0)
7999 
8000 #undef SETERRQ
8001 #define SETERRQ(comm, ierr, ...) \
8002   do { \
8003     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8004     return; \
8005   } while (0)
8006 
8007 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8008   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8009 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8010   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8011 #else
8012 #endif
8013 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8014 {
8015   Mat         mat = *mmat;
8016   PetscInt    m = *mm, n = *mn;
8017   InsertMode  addv = *maddv;
8018   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8019   PetscScalar value;
8020 
8021   MatCheckPreallocated(mat, 1);
8022   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8023   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8024   {
8025     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8026     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8027     PetscBool roworiented = aij->roworiented;
8028 
8029     /* Some Variables required in the macro */
8030     Mat         A     = aij->A;
8031     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8032     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8033     MatScalar  *aa;
8034     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8035     Mat         B                 = aij->B;
8036     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8037     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8038     MatScalar  *ba;
8039     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8040      * cannot use "#if defined" inside a macro. */
8041     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8042 
8043     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8044     PetscInt   nonew = a->nonew;
8045     MatScalar *ap1, *ap2;
8046 
8047     PetscFunctionBegin;
8048     PetscCall(MatSeqAIJGetArray(A, &aa));
8049     PetscCall(MatSeqAIJGetArray(B, &ba));
8050     for (i = 0; i < m; i++) {
8051       if (im[i] < 0) continue;
8052       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8053       if (im[i] >= rstart && im[i] < rend) {
8054         row      = im[i] - rstart;
8055         lastcol1 = -1;
8056         rp1      = aj + ai[row];
8057         ap1      = aa + ai[row];
8058         rmax1    = aimax[row];
8059         nrow1    = ailen[row];
8060         low1     = 0;
8061         high1    = nrow1;
8062         lastcol2 = -1;
8063         rp2      = bj + bi[row];
8064         ap2      = ba + bi[row];
8065         rmax2    = bimax[row];
8066         nrow2    = bilen[row];
8067         low2     = 0;
8068         high2    = nrow2;
8069 
8070         for (j = 0; j < n; j++) {
8071           if (roworiented) value = v[i * n + j];
8072           else value = v[i + j * m];
8073           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8074           if (in[j] >= cstart && in[j] < cend) {
8075             col = in[j] - cstart;
8076             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8077           } else if (in[j] < 0) continue;
8078           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8079             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8080           } else {
8081             if (mat->was_assembled) {
8082               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8083 #if defined(PETSC_USE_CTABLE)
8084               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8085               col--;
8086 #else
8087               col = aij->colmap[in[j]] - 1;
8088 #endif
8089               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8090                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8091                 col = in[j];
8092                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8093                 B        = aij->B;
8094                 b        = (Mat_SeqAIJ *)B->data;
8095                 bimax    = b->imax;
8096                 bi       = b->i;
8097                 bilen    = b->ilen;
8098                 bj       = b->j;
8099                 rp2      = bj + bi[row];
8100                 ap2      = ba + bi[row];
8101                 rmax2    = bimax[row];
8102                 nrow2    = bilen[row];
8103                 low2     = 0;
8104                 high2    = nrow2;
8105                 bm       = aij->B->rmap->n;
8106                 ba       = b->a;
8107                 inserted = PETSC_FALSE;
8108               }
8109             } else col = in[j];
8110             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8111           }
8112         }
8113       } else if (!aij->donotstash) {
8114         if (roworiented) {
8115           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8116         } else {
8117           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8118         }
8119       }
8120     }
8121     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8122     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8123   }
8124   PetscFunctionReturnVoid();
8125 }
8126 
8127 /* Undefining these here since they were redefined from their original definition above! No
8128  * other PETSc functions should be defined past this point, as it is impossible to recover the
8129  * original definitions */
8130 #undef PetscCall
8131 #undef SETERRQ
8132