xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2ad7e442857a3cef22c06b0e94de84654ca4e109)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166 
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
171 {
172   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
173 
174   PetscFunctionBegin;
175   if (mat->A) {
176     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
177     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
178   }
179   PetscFunctionReturn(PETSC_SUCCESS);
180 }
181 
182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
183 {
184   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
185   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
186   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
187   const PetscInt  *ia, *ib;
188   const MatScalar *aa, *bb, *aav, *bav;
189   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
190   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
191 
192   PetscFunctionBegin;
193   *keptrows = NULL;
194 
195   ia = a->i;
196   ib = b->i;
197   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
198   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
199   for (i = 0; i < m; i++) {
200     na = ia[i + 1] - ia[i];
201     nb = ib[i + 1] - ib[i];
202     if (!na && !nb) {
203       cnt++;
204       goto ok1;
205     }
206     aa = aav + ia[i];
207     for (j = 0; j < na; j++) {
208       if (aa[j] != 0.0) goto ok1;
209     }
210     bb = bav ? bav + ib[i] : NULL;
211     for (j = 0; j < nb; j++) {
212       if (bb[j] != 0.0) goto ok1;
213     }
214     cnt++;
215   ok1:;
216   }
217   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
218   if (!n0rows) {
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
220     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
221     PetscFunctionReturn(PETSC_SUCCESS);
222   }
223   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
224   cnt = 0;
225   for (i = 0; i < m; i++) {
226     na = ia[i + 1] - ia[i];
227     nb = ib[i + 1] - ib[i];
228     if (!na && !nb) continue;
229     aa = aav + ia[i];
230     for (j = 0; j < na; j++) {
231       if (aa[j] != 0.0) {
232         rows[cnt++] = rstart + i;
233         goto ok2;
234       }
235     }
236     bb = bav ? bav + ib[i] : NULL;
237     for (j = 0; j < nb; j++) {
238       if (bb[j] != 0.0) {
239         rows[cnt++] = rstart + i;
240         goto ok2;
241       }
242     }
243   ok2:;
244   }
245   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
247   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
252 {
253   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
254   PetscBool   cong;
255 
256   PetscFunctionBegin;
257   PetscCall(MatHasCongruentLayouts(Y, &cong));
258   if (Y->assembled && cong) {
259     PetscCall(MatDiagonalSet(aij->A, D, is));
260   } else {
261     PetscCall(MatDiagonalSet_Default(Y, D, is));
262   }
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
267 {
268   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
269   PetscInt    i, rstart, nrows, *rows;
270 
271   PetscFunctionBegin;
272   *zrows = NULL;
273   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
274   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
275   for (i = 0; i < nrows; i++) rows[i] += rstart;
276   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
281 {
282   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
283   PetscInt           i, m, n, *garray = aij->garray;
284   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
285   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
286   PetscReal         *work;
287   const PetscScalar *dummy;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   if (type == NORM_INFINITY) {
313     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
314   } else {
315     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
316   }
317   PetscCall(PetscFree(work));
318   if (type == NORM_2) {
319     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
320   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
321     for (i = 0; i < n; i++) reductions[i] /= m;
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
327 {
328   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
329   IS              sis, gis;
330   const PetscInt *isis, *igis;
331   PetscInt        n, *iis, nsis, ngis, rstart, i;
332 
333   PetscFunctionBegin;
334   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
335   PetscCall(MatFindNonzeroRows(a->B, &gis));
336   PetscCall(ISGetSize(gis, &ngis));
337   PetscCall(ISGetSize(sis, &nsis));
338   PetscCall(ISGetIndices(sis, &isis));
339   PetscCall(ISGetIndices(gis, &igis));
340 
341   PetscCall(PetscMalloc1(ngis + nsis, &iis));
342   PetscCall(PetscArraycpy(iis, igis, ngis));
343   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
344   n = ngis + nsis;
345   PetscCall(PetscSortRemoveDupsInt(&n, iis));
346   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
347   for (i = 0; i < n; i++) iis[i] += rstart;
348   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
349 
350   PetscCall(ISRestoreIndices(sis, &isis));
351   PetscCall(ISRestoreIndices(gis, &igis));
352   PetscCall(ISDestroy(&sis));
353   PetscCall(ISDestroy(&gis));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*
358   Local utility routine that creates a mapping from the global column
359 number to the local number in the off-diagonal part of the local
360 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
361 a slightly higher hash table cost; without it it is not scalable (each processor
362 has an order N integer array but is fast to access.
363 */
364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
365 {
366   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
367   PetscInt    n   = aij->B->cmap->n, i;
368 
369   PetscFunctionBegin;
370   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
371 #if defined(PETSC_USE_CTABLE)
372   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
373   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
374 #else
375   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
376   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
377 #endif
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
382   do { \
383     if (col <= lastcol1) low1 = 0; \
384     else high1 = nrow1; \
385     lastcol1 = col; \
386     while (high1 - low1 > 5) { \
387       t = (low1 + high1) / 2; \
388       if (rp1[t] > col) high1 = t; \
389       else low1 = t; \
390     } \
391     for (_i = low1; _i < high1; _i++) { \
392       if (rp1[_i] > col) break; \
393       if (rp1[_i] == col) { \
394         if (addv == ADD_VALUES) { \
395           ap1[_i] += value; \
396           /* Not sure LogFlops will slow dow the code or not */ \
397           (void)PetscLogFlops(1.0); \
398         } else ap1[_i] = value; \
399         goto a_noinsert; \
400       } \
401     } \
402     if (value == 0.0 && ignorezeroentries && row != col) { \
403       low1  = 0; \
404       high1 = nrow1; \
405       goto a_noinsert; \
406     } \
407     if (nonew == 1) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
413     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
414     N = nrow1++ - 1; \
415     a->nz++; \
416     high1++; \
417     /* shift up all the later entries in this row */ \
418     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
419     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
420     rp1[_i] = col; \
421     ap1[_i] = value; \
422     A->nonzerostate++; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467     B->nonzerostate++; \
468   b_noinsert:; \
469     bilen[row] = nrow2; \
470   } while (0)
471 
472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
473 {
474   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
475   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
476   PetscInt     l, *garray                         = mat->garray, diag;
477   PetscScalar *aa, *ba;
478 
479   PetscFunctionBegin;
480   /* code only works for square matrices A */
481 
482   /* find size of row to the left of the diagonal part */
483   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
484   row = row - diag;
485   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
486     if (garray[b->j[b->i[row] + l]] > diag) break;
487   }
488   if (l) {
489     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
490     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
491     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
492   }
493 
494   /* diagonal part */
495   if (a->i[row + 1] - a->i[row]) {
496     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
497     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
498     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
499   }
500 
501   /* right of diagonal part */
502   if (b->i[row + 1] - b->i[row] - l) {
503     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
504     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
505     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
506   }
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
511 {
512   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
513   PetscScalar value = 0.0;
514   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
515   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
516   PetscBool   roworiented = aij->roworiented;
517 
518   /* Some Variables required in the macro */
519   Mat         A     = aij->A;
520   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
521   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
522   PetscBool   ignorezeroentries = a->ignorezeroentries;
523   Mat         B                 = aij->B;
524   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
525   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
526   MatScalar  *aa, *ba;
527   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
528   PetscInt    nonew;
529   MatScalar  *ap1, *ap2;
530 
531   PetscFunctionBegin;
532   PetscCall(MatSeqAIJGetArray(A, &aa));
533   PetscCall(MatSeqAIJGetArray(B, &ba));
534   for (i = 0; i < m; i++) {
535     if (im[i] < 0) continue;
536     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
537     if (im[i] >= rstart && im[i] < rend) {
538       row      = im[i] - rstart;
539       lastcol1 = -1;
540       rp1      = aj ? aj + ai[row] : NULL;
541       ap1      = aa ? aa + ai[row] : NULL;
542       rmax1    = aimax[row];
543       nrow1    = ailen[row];
544       low1     = 0;
545       high1    = nrow1;
546       lastcol2 = -1;
547       rp2      = bj ? bj + bi[row] : NULL;
548       ap2      = ba ? ba + bi[row] : NULL;
549       rmax2    = bimax[row];
550       nrow2    = bilen[row];
551       low2     = 0;
552       high2    = nrow2;
553 
554       for (j = 0; j < n; j++) {
555         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
556         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
557         if (in[j] >= cstart && in[j] < cend) {
558           col   = in[j] - cstart;
559           nonew = a->nonew;
560           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
561         } else if (in[j] < 0) {
562           continue;
563         } else {
564           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
565           if (mat->was_assembled) {
566             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
567 #if defined(PETSC_USE_CTABLE)
568             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
569             col--;
570 #else
571             col = aij->colmap[in[j]] - 1;
572 #endif
573             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
574               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
575               col = in[j];
576               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
577               B     = aij->B;
578               b     = (Mat_SeqAIJ *)B->data;
579               bimax = b->imax;
580               bi    = b->i;
581               bilen = b->ilen;
582               bj    = b->j;
583               ba    = b->a;
584               rp2   = bj + bi[row];
585               ap2   = ba + bi[row];
586               rmax2 = bimax[row];
587               nrow2 = bilen[row];
588               low2  = 0;
589               high2 = nrow2;
590               bm    = aij->B->rmap->n;
591               ba    = b->a;
592             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
593               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
594                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
595               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
596             }
597           } else col = in[j];
598           nonew = b->nonew;
599           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
600         }
601       }
602     } else {
603       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
604       if (!aij->donotstash) {
605         mat->assembled = PETSC_FALSE;
606         if (roworiented) {
607           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v ? v + i * n : NULL, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
608         } else {
609           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v ? v + i : NULL, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
610         }
611       }
612     }
613   }
614   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
615   PetscCall(MatSeqAIJRestoreArray(B, &ba));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 /*
620     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
621     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
622     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
623 */
624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
625 {
626   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
627   Mat         A      = aij->A; /* diagonal part of the matrix */
628   Mat         B      = aij->B; /* offdiagonal part of the matrix */
629   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
630   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
632   PetscInt   *ailen = a->ilen, *aj = a->j;
633   PetscInt   *bilen = b->ilen, *bj = b->j;
634   PetscInt    am          = aij->A->rmap->n, j;
635   PetscInt    diag_so_far = 0, dnz;
636   PetscInt    offd_so_far = 0, onz;
637 
638   PetscFunctionBegin;
639   /* Iterate over all rows of the matrix */
640   for (j = 0; j < am; j++) {
641     dnz = onz = 0;
642     /*  Iterate over all non-zero columns of the current row */
643     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
644       /* If column is in the diagonal */
645       if (mat_j[col] >= cstart && mat_j[col] < cend) {
646         aj[diag_so_far++] = mat_j[col] - cstart;
647         dnz++;
648       } else { /* off-diagonal entries */
649         bj[offd_so_far++] = mat_j[col];
650         onz++;
651       }
652     }
653     ailen[j] = dnz;
654     bilen[j] = onz;
655   }
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 /*
660     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
661     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
662     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
663     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
664     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
665 */
666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
667 {
668   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
669   Mat          A    = aij->A; /* diagonal part of the matrix */
670   Mat          B    = aij->B; /* offdiagonal part of the matrix */
671   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
672   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
673   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
674   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
675   PetscInt    *ailen = a->ilen, *aj = a->j;
676   PetscInt    *bilen = b->ilen, *bj = b->j;
677   PetscInt     am          = aij->A->rmap->n, j;
678   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
679   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
680   PetscScalar *aa = a->a, *ba = b->a;
681 
682   PetscFunctionBegin;
683   /* Iterate over all rows of the matrix */
684   for (j = 0; j < am; j++) {
685     dnz_row = onz_row = 0;
686     rowstart_offd     = full_offd_i[j];
687     rowstart_diag     = full_diag_i[j];
688     /*  Iterate over all non-zero columns of the current row */
689     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
690       /* If column is in the diagonal */
691       if (mat_j[col] >= cstart && mat_j[col] < cend) {
692         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
693         aa[rowstart_diag + dnz_row] = mat_a[col];
694         dnz_row++;
695       } else { /* off-diagonal entries */
696         bj[rowstart_offd + onz_row] = mat_j[col];
697         ba[rowstart_offd + onz_row] = mat_a[col];
698         onz_row++;
699       }
700     }
701     ailen[j] = dnz_row;
702     bilen[j] = onz_row;
703   }
704   PetscFunctionReturn(PETSC_SUCCESS);
705 }
706 
707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
708 {
709   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
710   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
711   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
712 
713   PetscFunctionBegin;
714   for (i = 0; i < m; i++) {
715     if (idxm[i] < 0) continue; /* negative row */
716     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
717     if (idxm[i] >= rstart && idxm[i] < rend) {
718       row = idxm[i] - rstart;
719       for (j = 0; j < n; j++) {
720         if (idxn[j] < 0) continue; /* negative column */
721         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
722         if (idxn[j] >= cstart && idxn[j] < cend) {
723           col = idxn[j] - cstart;
724           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
725         } else {
726           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
727 #if defined(PETSC_USE_CTABLE)
728           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
729           col--;
730 #else
731           col = aij->colmap[idxn[j]] - 1;
732 #endif
733           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
734           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
735         }
736       }
737     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
738   }
739   PetscFunctionReturn(PETSC_SUCCESS);
740 }
741 
742 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
743 {
744   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
745   PetscInt    nstash, reallocs;
746 
747   PetscFunctionBegin;
748   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
749 
750   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
751   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
752   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
753   PetscFunctionReturn(PETSC_SUCCESS);
754 }
755 
756 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
757 {
758   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
759   PetscMPIInt  n;
760   PetscInt     i, j, rstart, ncols, flg;
761   PetscInt    *row, *col;
762   PetscBool    other_disassembled;
763   PetscScalar *val;
764 
765   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
766 
767   PetscFunctionBegin;
768   if (!aij->donotstash && !mat->nooffprocentries) {
769     while (1) {
770       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
771       if (!flg) break;
772 
773       for (i = 0; i < n;) {
774         /* Now identify the consecutive vals belonging to the same row */
775         for (j = i, rstart = row[j]; j < n; j++) {
776           if (row[j] != rstart) break;
777         }
778         if (j < n) ncols = j - i;
779         else ncols = n - i;
780         /* Now assemble all these values with a single function call */
781         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
782         i = j;
783       }
784     }
785     PetscCall(MatStashScatterEnd_Private(&mat->stash));
786   }
787 #if defined(PETSC_HAVE_DEVICE)
788   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
789   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
790   if (mat->boundtocpu) {
791     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
792     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
793   }
794 #endif
795   PetscCall(MatAssemblyBegin(aij->A, mode));
796   PetscCall(MatAssemblyEnd(aij->A, mode));
797 
798   /* determine if any processor has disassembled, if so we must
799      also disassemble ourself, in order that we may reassemble. */
800   /*
801      if nonzero structure of submatrix B cannot change then we know that
802      no processor disassembled thus we can skip this stuff
803   */
804   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
805     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
806     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
807       PetscCall(MatDisAssemble_MPIAIJ(mat));
808     }
809   }
810   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
811   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
812 #if defined(PETSC_HAVE_DEVICE)
813   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
814 #endif
815   PetscCall(MatAssemblyBegin(aij->B, mode));
816   PetscCall(MatAssemblyEnd(aij->B, mode));
817 
818   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
819 
820   aij->rowvalues = NULL;
821 
822   PetscCall(VecDestroy(&aij->diag));
823 
824   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
825   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
826     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
827     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
828   }
829 #if defined(PETSC_HAVE_DEVICE)
830   mat->offloadmask = PETSC_OFFLOAD_BOTH;
831 #endif
832   PetscFunctionReturn(PETSC_SUCCESS);
833 }
834 
835 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
836 {
837   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
838 
839   PetscFunctionBegin;
840   PetscCall(MatZeroEntries(l->A));
841   PetscCall(MatZeroEntries(l->B));
842   PetscFunctionReturn(PETSC_SUCCESS);
843 }
844 
845 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
846 {
847   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
848   PetscObjectState sA, sB;
849   PetscInt        *lrows;
850   PetscInt         r, len;
851   PetscBool        cong, lch, gch;
852 
853   PetscFunctionBegin;
854   /* get locally owned rows */
855   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
856   PetscCall(MatHasCongruentLayouts(A, &cong));
857   /* fix right hand side if needed */
858   if (x && b) {
859     const PetscScalar *xx;
860     PetscScalar       *bb;
861 
862     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
863     PetscCall(VecGetArrayRead(x, &xx));
864     PetscCall(VecGetArray(b, &bb));
865     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
866     PetscCall(VecRestoreArrayRead(x, &xx));
867     PetscCall(VecRestoreArray(b, &bb));
868   }
869 
870   sA = mat->A->nonzerostate;
871   sB = mat->B->nonzerostate;
872 
873   if (diag != 0.0 && cong) {
874     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
875     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
876   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
877     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
878     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
879     PetscInt    nnwA, nnwB;
880     PetscBool   nnzA, nnzB;
881 
882     nnwA = aijA->nonew;
883     nnwB = aijB->nonew;
884     nnzA = aijA->keepnonzeropattern;
885     nnzB = aijB->keepnonzeropattern;
886     if (!nnzA) {
887       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
888       aijA->nonew = 0;
889     }
890     if (!nnzB) {
891       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
892       aijB->nonew = 0;
893     }
894     /* Must zero here before the next loop */
895     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
896     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
897     for (r = 0; r < len; ++r) {
898       const PetscInt row = lrows[r] + A->rmap->rstart;
899       if (row >= A->cmap->N) continue;
900       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
901     }
902     aijA->nonew = nnwA;
903     aijB->nonew = nnwB;
904   } else {
905     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
906     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
907   }
908   PetscCall(PetscFree(lrows));
909   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
910   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
911 
912   /* reduce nonzerostate */
913   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
914   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
915   if (gch) A->nonzerostate++;
916   PetscFunctionReturn(PETSC_SUCCESS);
917 }
918 
919 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
920 {
921   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
922   PetscMPIInt        n = A->rmap->n;
923   PetscInt           i, j, r, m, len = 0;
924   PetscInt          *lrows, *owners = A->rmap->range;
925   PetscMPIInt        p = 0;
926   PetscSFNode       *rrows;
927   PetscSF            sf;
928   const PetscScalar *xx;
929   PetscScalar       *bb, *mask, *aij_a;
930   Vec                xmask, lmask;
931   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
932   const PetscInt    *aj, *ii, *ridx;
933   PetscScalar       *aa;
934 
935   PetscFunctionBegin;
936   /* Create SF where leaves are input rows and roots are owned rows */
937   PetscCall(PetscMalloc1(n, &lrows));
938   for (r = 0; r < n; ++r) lrows[r] = -1;
939   PetscCall(PetscMalloc1(N, &rrows));
940   for (r = 0; r < N; ++r) {
941     const PetscInt idx = rows[r];
942     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
943     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
944       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
945     }
946     rrows[r].rank  = p;
947     rrows[r].index = rows[r] - owners[p];
948   }
949   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
950   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
951   /* Collect flags for rows to be zeroed */
952   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
953   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
954   PetscCall(PetscSFDestroy(&sf));
955   /* Compress and put in row numbers */
956   for (r = 0; r < n; ++r)
957     if (lrows[r] >= 0) lrows[len++] = r;
958   /* zero diagonal part of matrix */
959   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
960   /* handle off diagonal part of matrix */
961   PetscCall(MatCreateVecs(A, &xmask, NULL));
962   PetscCall(VecDuplicate(l->lvec, &lmask));
963   PetscCall(VecGetArray(xmask, &bb));
964   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
965   PetscCall(VecRestoreArray(xmask, &bb));
966   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
967   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
968   PetscCall(VecDestroy(&xmask));
969   if (x && b) { /* this code is buggy when the row and column layout don't match */
970     PetscBool cong;
971 
972     PetscCall(MatHasCongruentLayouts(A, &cong));
973     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
974     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
975     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
976     PetscCall(VecGetArrayRead(l->lvec, &xx));
977     PetscCall(VecGetArray(b, &bb));
978   }
979   PetscCall(VecGetArray(lmask, &mask));
980   /* remove zeroed rows of off diagonal matrix */
981   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
982   ii = aij->i;
983   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
984   /* loop over all elements of off process part of matrix zeroing removed columns*/
985   if (aij->compressedrow.use) {
986     m    = aij->compressedrow.nrows;
987     ii   = aij->compressedrow.i;
988     ridx = aij->compressedrow.rindex;
989     for (i = 0; i < m; i++) {
990       n  = ii[i + 1] - ii[i];
991       aj = aij->j + ii[i];
992       aa = aij_a + ii[i];
993 
994       for (j = 0; j < n; j++) {
995         if (PetscAbsScalar(mask[*aj])) {
996           if (b) bb[*ridx] -= *aa * xx[*aj];
997           *aa = 0.0;
998         }
999         aa++;
1000         aj++;
1001       }
1002       ridx++;
1003     }
1004   } else { /* do not use compressed row format */
1005     m = l->B->rmap->n;
1006     for (i = 0; i < m; i++) {
1007       n  = ii[i + 1] - ii[i];
1008       aj = aij->j + ii[i];
1009       aa = aij_a + ii[i];
1010       for (j = 0; j < n; j++) {
1011         if (PetscAbsScalar(mask[*aj])) {
1012           if (b) bb[i] -= *aa * xx[*aj];
1013           *aa = 0.0;
1014         }
1015         aa++;
1016         aj++;
1017       }
1018     }
1019   }
1020   if (x && b) {
1021     PetscCall(VecRestoreArray(b, &bb));
1022     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1023   }
1024   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1025   PetscCall(VecRestoreArray(lmask, &mask));
1026   PetscCall(VecDestroy(&lmask));
1027   PetscCall(PetscFree(lrows));
1028 
1029   /* only change matrix nonzero state if pattern was allowed to be changed */
1030   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1031     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1032     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1033   }
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
1037 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1038 {
1039   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1040   PetscInt    nt;
1041   VecScatter  Mvctx = a->Mvctx;
1042 
1043   PetscFunctionBegin;
1044   PetscCall(VecGetLocalSize(xx, &nt));
1045   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1046   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1047   PetscUseTypeMethod(a->A, mult, xx, yy);
1048   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1049   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1050   PetscFunctionReturn(PETSC_SUCCESS);
1051 }
1052 
1053 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1054 {
1055   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1056 
1057   PetscFunctionBegin;
1058   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1059   PetscFunctionReturn(PETSC_SUCCESS);
1060 }
1061 
1062 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1063 {
1064   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1065   VecScatter  Mvctx = a->Mvctx;
1066 
1067   PetscFunctionBegin;
1068   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1069   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1070   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1071   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1072   PetscFunctionReturn(PETSC_SUCCESS);
1073 }
1074 
1075 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1076 {
1077   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1078 
1079   PetscFunctionBegin;
1080   /* do nondiagonal part */
1081   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1082   /* do local part */
1083   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1084   /* add partial results together */
1085   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1086   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1091 {
1092   MPI_Comm    comm;
1093   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1094   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1095   IS          Me, Notme;
1096   PetscInt    M, N, first, last, *notme, i;
1097   PetscBool   lf;
1098   PetscMPIInt size;
1099 
1100   PetscFunctionBegin;
1101   /* Easy test: symmetric diagonal block */
1102   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1103   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1104   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1105   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1106   PetscCallMPI(MPI_Comm_size(comm, &size));
1107   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1108 
1109   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1110   PetscCall(MatGetSize(Amat, &M, &N));
1111   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1112   PetscCall(PetscMalloc1(N - last + first, &notme));
1113   for (i = 0; i < first; i++) notme[i] = i;
1114   for (i = last; i < M; i++) notme[i - last + first] = i;
1115   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1116   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1117   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1118   Aoff = Aoffs[0];
1119   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1120   Boff = Boffs[0];
1121   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1122   PetscCall(MatDestroyMatrices(1, &Aoffs));
1123   PetscCall(MatDestroyMatrices(1, &Boffs));
1124   PetscCall(ISDestroy(&Me));
1125   PetscCall(ISDestroy(&Notme));
1126   PetscCall(PetscFree(notme));
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1131 {
1132   PetscFunctionBegin;
1133   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 
1137 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1138 {
1139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1140 
1141   PetscFunctionBegin;
1142   /* do nondiagonal part */
1143   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1144   /* do local part */
1145   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1146   /* add partial results together */
1147   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1148   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1149   PetscFunctionReturn(PETSC_SUCCESS);
1150 }
1151 
1152 /*
1153   This only works correctly for square matrices where the subblock A->A is the
1154    diagonal block
1155 */
1156 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1157 {
1158   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1159 
1160   PetscFunctionBegin;
1161   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1162   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1163   PetscCall(MatGetDiagonal(a->A, v));
1164   PetscFunctionReturn(PETSC_SUCCESS);
1165 }
1166 
1167 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1168 {
1169   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1170 
1171   PetscFunctionBegin;
1172   PetscCall(MatScale(a->A, aa));
1173   PetscCall(MatScale(a->B, aa));
1174   PetscFunctionReturn(PETSC_SUCCESS);
1175 }
1176 
1177 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1178 {
1179   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1180   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1181   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1182   const PetscInt    *garray = aij->garray;
1183   const PetscScalar *aa, *ba;
1184   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1185   PetscInt64         nz, hnz;
1186   PetscInt          *rowlens;
1187   PetscInt          *colidxs;
1188   PetscScalar       *matvals;
1189   PetscMPIInt        rank;
1190 
1191   PetscFunctionBegin;
1192   PetscCall(PetscViewerSetUp(viewer));
1193 
1194   M  = mat->rmap->N;
1195   N  = mat->cmap->N;
1196   m  = mat->rmap->n;
1197   rs = mat->rmap->rstart;
1198   cs = mat->cmap->rstart;
1199   nz = A->nz + B->nz;
1200 
1201   /* write matrix header */
1202   header[0] = MAT_FILE_CLASSID;
1203   header[1] = M;
1204   header[2] = N;
1205   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1206   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1207   if (rank == 0) {
1208     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1209     else header[3] = (PetscInt)hnz;
1210   }
1211   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1212 
1213   /* fill in and store row lengths  */
1214   PetscCall(PetscMalloc1(m, &rowlens));
1215   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1216   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1217   PetscCall(PetscFree(rowlens));
1218 
1219   /* fill in and store column indices */
1220   PetscCall(PetscMalloc1(nz, &colidxs));
1221   for (cnt = 0, i = 0; i < m; i++) {
1222     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1223       if (garray[B->j[jb]] > cs) break;
1224       colidxs[cnt++] = garray[B->j[jb]];
1225     }
1226     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1227     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1228   }
1229   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1230   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1231   PetscCall(PetscFree(colidxs));
1232 
1233   /* fill in and store nonzero values */
1234   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1235   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1236   PetscCall(PetscMalloc1(nz, &matvals));
1237   for (cnt = 0, i = 0; i < m; i++) {
1238     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1239       if (garray[B->j[jb]] > cs) break;
1240       matvals[cnt++] = ba[jb];
1241     }
1242     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1243     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1244   }
1245   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1246   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1247   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1248   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1249   PetscCall(PetscFree(matvals));
1250 
1251   /* write block size option to the viewer's .info file */
1252   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1253   PetscFunctionReturn(PETSC_SUCCESS);
1254 }
1255 
1256 #include <petscdraw.h>
1257 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1258 {
1259   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1260   PetscMPIInt       rank = aij->rank, size = aij->size;
1261   PetscBool         isdraw, iascii, isbinary;
1262   PetscViewer       sviewer;
1263   PetscViewerFormat format;
1264 
1265   PetscFunctionBegin;
1266   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1267   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1268   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1269   if (iascii) {
1270     PetscCall(PetscViewerGetFormat(viewer, &format));
1271     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1272       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1273       PetscCall(PetscMalloc1(size, &nz));
1274       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1275       for (i = 0; i < (PetscInt)size; i++) {
1276         nmax = PetscMax(nmax, nz[i]);
1277         nmin = PetscMin(nmin, nz[i]);
1278         navg += nz[i];
1279       }
1280       PetscCall(PetscFree(nz));
1281       navg = navg / size;
1282       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1283       PetscFunctionReturn(PETSC_SUCCESS);
1284     }
1285     PetscCall(PetscViewerGetFormat(viewer, &format));
1286     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1287       MatInfo   info;
1288       PetscInt *inodes = NULL;
1289 
1290       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1291       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1292       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1293       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1294       if (!inodes) {
1295         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1296                                                      (double)info.memory));
1297       } else {
1298         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1299                                                      (double)info.memory));
1300       }
1301       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1302       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1303       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1304       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1305       PetscCall(PetscViewerFlush(viewer));
1306       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1307       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1308       PetscCall(VecScatterView(aij->Mvctx, viewer));
1309       PetscFunctionReturn(PETSC_SUCCESS);
1310     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1311       PetscInt inodecount, inodelimit, *inodes;
1312       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1313       if (inodes) {
1314         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1315       } else {
1316         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1317       }
1318       PetscFunctionReturn(PETSC_SUCCESS);
1319     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1320       PetscFunctionReturn(PETSC_SUCCESS);
1321     }
1322   } else if (isbinary) {
1323     if (size == 1) {
1324       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1325       PetscCall(MatView(aij->A, viewer));
1326     } else {
1327       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1328     }
1329     PetscFunctionReturn(PETSC_SUCCESS);
1330   } else if (iascii && size == 1) {
1331     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1332     PetscCall(MatView(aij->A, viewer));
1333     PetscFunctionReturn(PETSC_SUCCESS);
1334   } else if (isdraw) {
1335     PetscDraw draw;
1336     PetscBool isnull;
1337     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1338     PetscCall(PetscDrawIsNull(draw, &isnull));
1339     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1340   }
1341 
1342   { /* assemble the entire matrix onto first processor */
1343     Mat A = NULL, Av;
1344     IS  isrow, iscol;
1345 
1346     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1347     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1348     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1349     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1350     /*  The commented code uses MatCreateSubMatrices instead */
1351     /*
1352     Mat *AA, A = NULL, Av;
1353     IS  isrow,iscol;
1354 
1355     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1356     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1357     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1358     if (rank == 0) {
1359        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1360        A    = AA[0];
1361        Av   = AA[0];
1362     }
1363     PetscCall(MatDestroySubMatrices(1,&AA));
1364 */
1365     PetscCall(ISDestroy(&iscol));
1366     PetscCall(ISDestroy(&isrow));
1367     /*
1368        Everyone has to call to draw the matrix since the graphics waits are
1369        synchronized across all processors that share the PetscDraw object
1370     */
1371     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1372     if (rank == 0) {
1373       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1374       PetscCall(MatView_SeqAIJ(Av, sviewer));
1375     }
1376     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1377     PetscCall(PetscViewerFlush(viewer));
1378     PetscCall(MatDestroy(&A));
1379   }
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1384 {
1385   PetscBool iascii, isdraw, issocket, isbinary;
1386 
1387   PetscFunctionBegin;
1388   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1389   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1390   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1391   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1392   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1393   PetscFunctionReturn(PETSC_SUCCESS);
1394 }
1395 
1396 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1397 {
1398   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1399   Vec         bb1 = NULL;
1400   PetscBool   hasop;
1401 
1402   PetscFunctionBegin;
1403   if (flag == SOR_APPLY_UPPER) {
1404     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1405     PetscFunctionReturn(PETSC_SUCCESS);
1406   }
1407 
1408   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1409 
1410   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1411     if (flag & SOR_ZERO_INITIAL_GUESS) {
1412       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1413       its--;
1414     }
1415 
1416     while (its--) {
1417       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1418       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1419 
1420       /* update rhs: bb1 = bb - B*x */
1421       PetscCall(VecScale(mat->lvec, -1.0));
1422       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1423 
1424       /* local sweep */
1425       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1426     }
1427   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1428     if (flag & SOR_ZERO_INITIAL_GUESS) {
1429       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1430       its--;
1431     }
1432     while (its--) {
1433       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1434       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1435 
1436       /* update rhs: bb1 = bb - B*x */
1437       PetscCall(VecScale(mat->lvec, -1.0));
1438       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1439 
1440       /* local sweep */
1441       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1442     }
1443   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1444     if (flag & SOR_ZERO_INITIAL_GUESS) {
1445       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1446       its--;
1447     }
1448     while (its--) {
1449       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1450       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1451 
1452       /* update rhs: bb1 = bb - B*x */
1453       PetscCall(VecScale(mat->lvec, -1.0));
1454       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1455 
1456       /* local sweep */
1457       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1458     }
1459   } else if (flag & SOR_EISENSTAT) {
1460     Vec xx1;
1461 
1462     PetscCall(VecDuplicate(bb, &xx1));
1463     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1464 
1465     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1466     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1467     if (!mat->diag) {
1468       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1469       PetscCall(MatGetDiagonal(matin, mat->diag));
1470     }
1471     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1472     if (hasop) {
1473       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1474     } else {
1475       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1476     }
1477     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1478 
1479     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1480 
1481     /* local sweep */
1482     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1483     PetscCall(VecAXPY(xx, 1.0, xx1));
1484     PetscCall(VecDestroy(&xx1));
1485   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1486 
1487   PetscCall(VecDestroy(&bb1));
1488 
1489   matin->factorerrortype = mat->A->factorerrortype;
1490   PetscFunctionReturn(PETSC_SUCCESS);
1491 }
1492 
1493 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1494 {
1495   Mat             aA, aB, Aperm;
1496   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1497   PetscScalar    *aa, *ba;
1498   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1499   PetscSF         rowsf, sf;
1500   IS              parcolp = NULL;
1501   PetscBool       done;
1502 
1503   PetscFunctionBegin;
1504   PetscCall(MatGetLocalSize(A, &m, &n));
1505   PetscCall(ISGetIndices(rowp, &rwant));
1506   PetscCall(ISGetIndices(colp, &cwant));
1507   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1508 
1509   /* Invert row permutation to find out where my rows should go */
1510   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1511   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1512   PetscCall(PetscSFSetFromOptions(rowsf));
1513   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1514   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1515   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1516 
1517   /* Invert column permutation to find out where my columns should go */
1518   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1519   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1520   PetscCall(PetscSFSetFromOptions(sf));
1521   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1522   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1523   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1524   PetscCall(PetscSFDestroy(&sf));
1525 
1526   PetscCall(ISRestoreIndices(rowp, &rwant));
1527   PetscCall(ISRestoreIndices(colp, &cwant));
1528   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1529 
1530   /* Find out where my gcols should go */
1531   PetscCall(MatGetSize(aB, NULL, &ng));
1532   PetscCall(PetscMalloc1(ng, &gcdest));
1533   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1534   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1535   PetscCall(PetscSFSetFromOptions(sf));
1536   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1537   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1538   PetscCall(PetscSFDestroy(&sf));
1539 
1540   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1541   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1542   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1543   for (i = 0; i < m; i++) {
1544     PetscInt    row = rdest[i];
1545     PetscMPIInt rowner;
1546     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1547     for (j = ai[i]; j < ai[i + 1]; j++) {
1548       PetscInt    col = cdest[aj[j]];
1549       PetscMPIInt cowner;
1550       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1551       if (rowner == cowner) dnnz[i]++;
1552       else onnz[i]++;
1553     }
1554     for (j = bi[i]; j < bi[i + 1]; j++) {
1555       PetscInt    col = gcdest[bj[j]];
1556       PetscMPIInt cowner;
1557       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1558       if (rowner == cowner) dnnz[i]++;
1559       else onnz[i]++;
1560     }
1561   }
1562   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1563   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1564   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1565   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1566   PetscCall(PetscSFDestroy(&rowsf));
1567 
1568   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1569   PetscCall(MatSeqAIJGetArray(aA, &aa));
1570   PetscCall(MatSeqAIJGetArray(aB, &ba));
1571   for (i = 0; i < m; i++) {
1572     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1573     PetscInt  j0, rowlen;
1574     rowlen = ai[i + 1] - ai[i];
1575     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1576       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1577       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1578     }
1579     rowlen = bi[i + 1] - bi[i];
1580     for (j0 = j = 0; j < rowlen; j0 = j) {
1581       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1582       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1583     }
1584   }
1585   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1586   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1587   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1588   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1589   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1590   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1591   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1592   PetscCall(PetscFree3(work, rdest, cdest));
1593   PetscCall(PetscFree(gcdest));
1594   if (parcolp) PetscCall(ISDestroy(&colp));
1595   *B = Aperm;
1596   PetscFunctionReturn(PETSC_SUCCESS);
1597 }
1598 
1599 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1600 {
1601   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1602 
1603   PetscFunctionBegin;
1604   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1605   if (ghosts) *ghosts = aij->garray;
1606   PetscFunctionReturn(PETSC_SUCCESS);
1607 }
1608 
1609 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1610 {
1611   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1612   Mat            A = mat->A, B = mat->B;
1613   PetscLogDouble isend[5], irecv[5];
1614 
1615   PetscFunctionBegin;
1616   info->block_size = 1.0;
1617   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1618 
1619   isend[0] = info->nz_used;
1620   isend[1] = info->nz_allocated;
1621   isend[2] = info->nz_unneeded;
1622   isend[3] = info->memory;
1623   isend[4] = info->mallocs;
1624 
1625   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1626 
1627   isend[0] += info->nz_used;
1628   isend[1] += info->nz_allocated;
1629   isend[2] += info->nz_unneeded;
1630   isend[3] += info->memory;
1631   isend[4] += info->mallocs;
1632   if (flag == MAT_LOCAL) {
1633     info->nz_used      = isend[0];
1634     info->nz_allocated = isend[1];
1635     info->nz_unneeded  = isend[2];
1636     info->memory       = isend[3];
1637     info->mallocs      = isend[4];
1638   } else if (flag == MAT_GLOBAL_MAX) {
1639     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1640 
1641     info->nz_used      = irecv[0];
1642     info->nz_allocated = irecv[1];
1643     info->nz_unneeded  = irecv[2];
1644     info->memory       = irecv[3];
1645     info->mallocs      = irecv[4];
1646   } else if (flag == MAT_GLOBAL_SUM) {
1647     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1648 
1649     info->nz_used      = irecv[0];
1650     info->nz_allocated = irecv[1];
1651     info->nz_unneeded  = irecv[2];
1652     info->memory       = irecv[3];
1653     info->mallocs      = irecv[4];
1654   }
1655   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1656   info->fill_ratio_needed = 0;
1657   info->factor_mallocs    = 0;
1658   PetscFunctionReturn(PETSC_SUCCESS);
1659 }
1660 
1661 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1662 {
1663   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1664 
1665   PetscFunctionBegin;
1666   switch (op) {
1667   case MAT_NEW_NONZERO_LOCATIONS:
1668   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1669   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1670   case MAT_KEEP_NONZERO_PATTERN:
1671   case MAT_NEW_NONZERO_LOCATION_ERR:
1672   case MAT_USE_INODES:
1673   case MAT_IGNORE_ZERO_ENTRIES:
1674   case MAT_FORM_EXPLICIT_TRANSPOSE:
1675     MatCheckPreallocated(A, 1);
1676     PetscCall(MatSetOption(a->A, op, flg));
1677     PetscCall(MatSetOption(a->B, op, flg));
1678     break;
1679   case MAT_ROW_ORIENTED:
1680     MatCheckPreallocated(A, 1);
1681     a->roworiented = flg;
1682 
1683     PetscCall(MatSetOption(a->A, op, flg));
1684     PetscCall(MatSetOption(a->B, op, flg));
1685     break;
1686   case MAT_FORCE_DIAGONAL_ENTRIES:
1687   case MAT_SORTED_FULL:
1688     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1689     break;
1690   case MAT_IGNORE_OFF_PROC_ENTRIES:
1691     a->donotstash = flg;
1692     break;
1693   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1694   case MAT_SPD:
1695   case MAT_SYMMETRIC:
1696   case MAT_STRUCTURALLY_SYMMETRIC:
1697   case MAT_HERMITIAN:
1698   case MAT_SYMMETRY_ETERNAL:
1699   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1700   case MAT_SPD_ETERNAL:
1701     /* if the diagonal matrix is square it inherits some of the properties above */
1702     break;
1703   case MAT_SUBMAT_SINGLEIS:
1704     A->submat_singleis = flg;
1705     break;
1706   case MAT_STRUCTURE_ONLY:
1707     /* The option is handled directly by MatSetOption() */
1708     break;
1709   default:
1710     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1711   }
1712   PetscFunctionReturn(PETSC_SUCCESS);
1713 }
1714 
1715 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1716 {
1717   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1718   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1719   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1720   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1721   PetscInt    *cmap, *idx_p;
1722 
1723   PetscFunctionBegin;
1724   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1725   mat->getrowactive = PETSC_TRUE;
1726 
1727   if (!mat->rowvalues && (idx || v)) {
1728     /*
1729         allocate enough space to hold information from the longest row.
1730     */
1731     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1732     PetscInt    max = 1, tmp;
1733     for (i = 0; i < matin->rmap->n; i++) {
1734       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1735       if (max < tmp) max = tmp;
1736     }
1737     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1738   }
1739 
1740   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1741   lrow = row - rstart;
1742 
1743   pvA = &vworkA;
1744   pcA = &cworkA;
1745   pvB = &vworkB;
1746   pcB = &cworkB;
1747   if (!v) {
1748     pvA = NULL;
1749     pvB = NULL;
1750   }
1751   if (!idx) {
1752     pcA = NULL;
1753     if (!v) pcB = NULL;
1754   }
1755   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1756   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1757   nztot = nzA + nzB;
1758 
1759   cmap = mat->garray;
1760   if (v || idx) {
1761     if (nztot) {
1762       /* Sort by increasing column numbers, assuming A and B already sorted */
1763       PetscInt imark = -1;
1764       if (v) {
1765         *v = v_p = mat->rowvalues;
1766         for (i = 0; i < nzB; i++) {
1767           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1768           else break;
1769         }
1770         imark = i;
1771         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1772         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1773       }
1774       if (idx) {
1775         *idx = idx_p = mat->rowindices;
1776         if (imark > -1) {
1777           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1778         } else {
1779           for (i = 0; i < nzB; i++) {
1780             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1781             else break;
1782           }
1783           imark = i;
1784         }
1785         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1786         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1787       }
1788     } else {
1789       if (idx) *idx = NULL;
1790       if (v) *v = NULL;
1791     }
1792   }
1793   *nz = nztot;
1794   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1795   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1796   PetscFunctionReturn(PETSC_SUCCESS);
1797 }
1798 
1799 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1800 {
1801   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1802 
1803   PetscFunctionBegin;
1804   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1805   aij->getrowactive = PETSC_FALSE;
1806   PetscFunctionReturn(PETSC_SUCCESS);
1807 }
1808 
1809 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1810 {
1811   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1812   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1813   PetscInt         i, j, cstart = mat->cmap->rstart;
1814   PetscReal        sum = 0.0;
1815   const MatScalar *v, *amata, *bmata;
1816 
1817   PetscFunctionBegin;
1818   if (aij->size == 1) {
1819     PetscCall(MatNorm(aij->A, type, norm));
1820   } else {
1821     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1822     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1823     if (type == NORM_FROBENIUS) {
1824       v = amata;
1825       for (i = 0; i < amat->nz; i++) {
1826         sum += PetscRealPart(PetscConj(*v) * (*v));
1827         v++;
1828       }
1829       v = bmata;
1830       for (i = 0; i < bmat->nz; i++) {
1831         sum += PetscRealPart(PetscConj(*v) * (*v));
1832         v++;
1833       }
1834       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1835       *norm = PetscSqrtReal(*norm);
1836       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1837     } else if (type == NORM_1) { /* max column norm */
1838       PetscReal *tmp, *tmp2;
1839       PetscInt  *jj, *garray = aij->garray;
1840       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1841       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1842       *norm = 0.0;
1843       v     = amata;
1844       jj    = amat->j;
1845       for (j = 0; j < amat->nz; j++) {
1846         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1847         v++;
1848       }
1849       v  = bmata;
1850       jj = bmat->j;
1851       for (j = 0; j < bmat->nz; j++) {
1852         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1853         v++;
1854       }
1855       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1856       for (j = 0; j < mat->cmap->N; j++) {
1857         if (tmp2[j] > *norm) *norm = tmp2[j];
1858       }
1859       PetscCall(PetscFree(tmp));
1860       PetscCall(PetscFree(tmp2));
1861       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1862     } else if (type == NORM_INFINITY) { /* max row norm */
1863       PetscReal ntemp = 0.0;
1864       for (j = 0; j < aij->A->rmap->n; j++) {
1865         v   = amata + amat->i[j];
1866         sum = 0.0;
1867         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1868           sum += PetscAbsScalar(*v);
1869           v++;
1870         }
1871         v = bmata + bmat->i[j];
1872         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1873           sum += PetscAbsScalar(*v);
1874           v++;
1875         }
1876         if (sum > ntemp) ntemp = sum;
1877       }
1878       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1879       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1880     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1881     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1882     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1883   }
1884   PetscFunctionReturn(PETSC_SUCCESS);
1885 }
1886 
1887 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1888 {
1889   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1890   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1891   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1892   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1893   Mat              B, A_diag, *B_diag;
1894   const MatScalar *pbv, *bv;
1895 
1896   PetscFunctionBegin;
1897   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1898   ma = A->rmap->n;
1899   na = A->cmap->n;
1900   mb = a->B->rmap->n;
1901   nb = a->B->cmap->n;
1902   ai = Aloc->i;
1903   aj = Aloc->j;
1904   bi = Bloc->i;
1905   bj = Bloc->j;
1906   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1907     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1908     PetscSFNode         *oloc;
1909     PETSC_UNUSED PetscSF sf;
1910 
1911     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1912     /* compute d_nnz for preallocation */
1913     PetscCall(PetscArrayzero(d_nnz, na));
1914     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1915     /* compute local off-diagonal contributions */
1916     PetscCall(PetscArrayzero(g_nnz, nb));
1917     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1918     /* map those to global */
1919     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1920     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1921     PetscCall(PetscSFSetFromOptions(sf));
1922     PetscCall(PetscArrayzero(o_nnz, na));
1923     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1924     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1925     PetscCall(PetscSFDestroy(&sf));
1926 
1927     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1928     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1929     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1930     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1931     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1932     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1933   } else {
1934     B = *matout;
1935     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1936   }
1937 
1938   b           = (Mat_MPIAIJ *)B->data;
1939   A_diag      = a->A;
1940   B_diag      = &b->A;
1941   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1942   A_diag_ncol = A_diag->cmap->N;
1943   B_diag_ilen = sub_B_diag->ilen;
1944   B_diag_i    = sub_B_diag->i;
1945 
1946   /* Set ilen for diagonal of B */
1947   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1948 
1949   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1950   very quickly (=without using MatSetValues), because all writes are local. */
1951   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1952   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1953 
1954   /* copy over the B part */
1955   PetscCall(PetscMalloc1(bi[mb], &cols));
1956   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1957   pbv = bv;
1958   row = A->rmap->rstart;
1959   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1960   cols_tmp = cols;
1961   for (i = 0; i < mb; i++) {
1962     ncol = bi[i + 1] - bi[i];
1963     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1964     row++;
1965     if (pbv) pbv += ncol;
1966     if (cols_tmp) cols_tmp += ncol;
1967   }
1968   PetscCall(PetscFree(cols));
1969   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1970 
1971   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1972   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1973   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1974     *matout = B;
1975   } else {
1976     PetscCall(MatHeaderMerge(A, &B));
1977   }
1978   PetscFunctionReturn(PETSC_SUCCESS);
1979 }
1980 
1981 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1982 {
1983   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1984   Mat         a = aij->A, b = aij->B;
1985   PetscInt    s1, s2, s3;
1986 
1987   PetscFunctionBegin;
1988   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1989   if (rr) {
1990     PetscCall(VecGetLocalSize(rr, &s1));
1991     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1992     /* Overlap communication with computation. */
1993     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1994   }
1995   if (ll) {
1996     PetscCall(VecGetLocalSize(ll, &s1));
1997     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1998     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1999   }
2000   /* scale  the diagonal block */
2001   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2002 
2003   if (rr) {
2004     /* Do a scatter end and then right scale the off-diagonal block */
2005     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2006     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2007   }
2008   PetscFunctionReturn(PETSC_SUCCESS);
2009 }
2010 
2011 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2012 {
2013   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2014 
2015   PetscFunctionBegin;
2016   PetscCall(MatSetUnfactored(a->A));
2017   PetscFunctionReturn(PETSC_SUCCESS);
2018 }
2019 
2020 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2021 {
2022   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2023   Mat         a, b, c, d;
2024   PetscBool   flg;
2025 
2026   PetscFunctionBegin;
2027   a = matA->A;
2028   b = matA->B;
2029   c = matB->A;
2030   d = matB->B;
2031 
2032   PetscCall(MatEqual(a, c, &flg));
2033   if (flg) PetscCall(MatEqual(b, d, &flg));
2034   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2035   PetscFunctionReturn(PETSC_SUCCESS);
2036 }
2037 
2038 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2039 {
2040   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2041   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2042 
2043   PetscFunctionBegin;
2044   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2045   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2046     /* because of the column compression in the off-processor part of the matrix a->B,
2047        the number of columns in a->B and b->B may be different, hence we cannot call
2048        the MatCopy() directly on the two parts. If need be, we can provide a more
2049        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2050        then copying the submatrices */
2051     PetscCall(MatCopy_Basic(A, B, str));
2052   } else {
2053     PetscCall(MatCopy(a->A, b->A, str));
2054     PetscCall(MatCopy(a->B, b->B, str));
2055   }
2056   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2057   PetscFunctionReturn(PETSC_SUCCESS);
2058 }
2059 
2060 /*
2061    Computes the number of nonzeros per row needed for preallocation when X and Y
2062    have different nonzero structure.
2063 */
2064 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2065 {
2066   PetscInt i, j, k, nzx, nzy;
2067 
2068   PetscFunctionBegin;
2069   /* Set the number of nonzeros in the new matrix */
2070   for (i = 0; i < m; i++) {
2071     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2072     nzx    = xi[i + 1] - xi[i];
2073     nzy    = yi[i + 1] - yi[i];
2074     nnz[i] = 0;
2075     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2076       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2077       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2078       nnz[i]++;
2079     }
2080     for (; k < nzy; k++) nnz[i]++;
2081   }
2082   PetscFunctionReturn(PETSC_SUCCESS);
2083 }
2084 
2085 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2086 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2087 {
2088   PetscInt    m = Y->rmap->N;
2089   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2090   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2091 
2092   PetscFunctionBegin;
2093   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2094   PetscFunctionReturn(PETSC_SUCCESS);
2095 }
2096 
2097 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2098 {
2099   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2100 
2101   PetscFunctionBegin;
2102   if (str == SAME_NONZERO_PATTERN) {
2103     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2104     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2105   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2106     PetscCall(MatAXPY_Basic(Y, a, X, str));
2107   } else {
2108     Mat       B;
2109     PetscInt *nnz_d, *nnz_o;
2110 
2111     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2112     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2113     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2114     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2115     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2116     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2117     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2118     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2119     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2120     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2121     PetscCall(MatHeaderMerge(Y, &B));
2122     PetscCall(PetscFree(nnz_d));
2123     PetscCall(PetscFree(nnz_o));
2124   }
2125   PetscFunctionReturn(PETSC_SUCCESS);
2126 }
2127 
2128 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2129 
2130 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2131 {
2132   PetscFunctionBegin;
2133   if (PetscDefined(USE_COMPLEX)) {
2134     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2135 
2136     PetscCall(MatConjugate_SeqAIJ(aij->A));
2137     PetscCall(MatConjugate_SeqAIJ(aij->B));
2138   }
2139   PetscFunctionReturn(PETSC_SUCCESS);
2140 }
2141 
2142 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2143 {
2144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2145 
2146   PetscFunctionBegin;
2147   PetscCall(MatRealPart(a->A));
2148   PetscCall(MatRealPart(a->B));
2149   PetscFunctionReturn(PETSC_SUCCESS);
2150 }
2151 
2152 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2153 {
2154   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2155 
2156   PetscFunctionBegin;
2157   PetscCall(MatImaginaryPart(a->A));
2158   PetscCall(MatImaginaryPart(a->B));
2159   PetscFunctionReturn(PETSC_SUCCESS);
2160 }
2161 
2162 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2163 {
2164   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2165   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2166   PetscScalar       *va, *vv;
2167   Vec                vB, vA;
2168   const PetscScalar *vb;
2169 
2170   PetscFunctionBegin;
2171   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2172   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2173 
2174   PetscCall(VecGetArrayWrite(vA, &va));
2175   if (idx) {
2176     for (i = 0; i < m; i++) {
2177       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2178     }
2179   }
2180 
2181   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2182   PetscCall(PetscMalloc1(m, &idxb));
2183   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2184 
2185   PetscCall(VecGetArrayWrite(v, &vv));
2186   PetscCall(VecGetArrayRead(vB, &vb));
2187   for (i = 0; i < m; i++) {
2188     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2189       vv[i] = vb[i];
2190       if (idx) idx[i] = a->garray[idxb[i]];
2191     } else {
2192       vv[i] = va[i];
2193       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2194     }
2195   }
2196   PetscCall(VecRestoreArrayWrite(vA, &vv));
2197   PetscCall(VecRestoreArrayWrite(vA, &va));
2198   PetscCall(VecRestoreArrayRead(vB, &vb));
2199   PetscCall(PetscFree(idxb));
2200   PetscCall(VecDestroy(&vA));
2201   PetscCall(VecDestroy(&vB));
2202   PetscFunctionReturn(PETSC_SUCCESS);
2203 }
2204 
2205 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2206 {
2207   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2208   PetscInt           m = A->rmap->n, n = A->cmap->n;
2209   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2210   PetscInt          *cmap = mat->garray;
2211   PetscInt          *diagIdx, *offdiagIdx;
2212   Vec                diagV, offdiagV;
2213   PetscScalar       *a, *diagA, *offdiagA;
2214   const PetscScalar *ba, *bav;
2215   PetscInt           r, j, col, ncols, *bi, *bj;
2216   Mat                B = mat->B;
2217   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2218 
2219   PetscFunctionBegin;
2220   /* When a process holds entire A and other processes have no entry */
2221   if (A->cmap->N == n) {
2222     PetscCall(VecGetArrayWrite(v, &diagA));
2223     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2224     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2225     PetscCall(VecDestroy(&diagV));
2226     PetscCall(VecRestoreArrayWrite(v, &diagA));
2227     PetscFunctionReturn(PETSC_SUCCESS);
2228   } else if (n == 0) {
2229     if (m) {
2230       PetscCall(VecGetArrayWrite(v, &a));
2231       for (r = 0; r < m; r++) {
2232         a[r] = 0.0;
2233         if (idx) idx[r] = -1;
2234       }
2235       PetscCall(VecRestoreArrayWrite(v, &a));
2236     }
2237     PetscFunctionReturn(PETSC_SUCCESS);
2238   }
2239 
2240   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2241   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2242   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2243   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2244 
2245   /* Get offdiagIdx[] for implicit 0.0 */
2246   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2247   ba = bav;
2248   bi = b->i;
2249   bj = b->j;
2250   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2251   for (r = 0; r < m; r++) {
2252     ncols = bi[r + 1] - bi[r];
2253     if (ncols == A->cmap->N - n) { /* Brow is dense */
2254       offdiagA[r]   = *ba;
2255       offdiagIdx[r] = cmap[0];
2256     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2257       offdiagA[r] = 0.0;
2258 
2259       /* Find first hole in the cmap */
2260       for (j = 0; j < ncols; j++) {
2261         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2262         if (col > j && j < cstart) {
2263           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2264           break;
2265         } else if (col > j + n && j >= cstart) {
2266           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2267           break;
2268         }
2269       }
2270       if (j == ncols && ncols < A->cmap->N - n) {
2271         /* a hole is outside compressed Bcols */
2272         if (ncols == 0) {
2273           if (cstart) {
2274             offdiagIdx[r] = 0;
2275           } else offdiagIdx[r] = cend;
2276         } else { /* ncols > 0 */
2277           offdiagIdx[r] = cmap[ncols - 1] + 1;
2278           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2279         }
2280       }
2281     }
2282 
2283     for (j = 0; j < ncols; j++) {
2284       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2285         offdiagA[r]   = *ba;
2286         offdiagIdx[r] = cmap[*bj];
2287       }
2288       ba++;
2289       bj++;
2290     }
2291   }
2292 
2293   PetscCall(VecGetArrayWrite(v, &a));
2294   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2295   for (r = 0; r < m; ++r) {
2296     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2297       a[r] = diagA[r];
2298       if (idx) idx[r] = cstart + diagIdx[r];
2299     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2300       a[r] = diagA[r];
2301       if (idx) {
2302         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2303           idx[r] = cstart + diagIdx[r];
2304         } else idx[r] = offdiagIdx[r];
2305       }
2306     } else {
2307       a[r] = offdiagA[r];
2308       if (idx) idx[r] = offdiagIdx[r];
2309     }
2310   }
2311   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2312   PetscCall(VecRestoreArrayWrite(v, &a));
2313   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2314   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2315   PetscCall(VecDestroy(&diagV));
2316   PetscCall(VecDestroy(&offdiagV));
2317   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2318   PetscFunctionReturn(PETSC_SUCCESS);
2319 }
2320 
2321 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2322 {
2323   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2324   PetscInt           m = A->rmap->n, n = A->cmap->n;
2325   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2326   PetscInt          *cmap = mat->garray;
2327   PetscInt          *diagIdx, *offdiagIdx;
2328   Vec                diagV, offdiagV;
2329   PetscScalar       *a, *diagA, *offdiagA;
2330   const PetscScalar *ba, *bav;
2331   PetscInt           r, j, col, ncols, *bi, *bj;
2332   Mat                B = mat->B;
2333   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2334 
2335   PetscFunctionBegin;
2336   /* When a process holds entire A and other processes have no entry */
2337   if (A->cmap->N == n) {
2338     PetscCall(VecGetArrayWrite(v, &diagA));
2339     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2340     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2341     PetscCall(VecDestroy(&diagV));
2342     PetscCall(VecRestoreArrayWrite(v, &diagA));
2343     PetscFunctionReturn(PETSC_SUCCESS);
2344   } else if (n == 0) {
2345     if (m) {
2346       PetscCall(VecGetArrayWrite(v, &a));
2347       for (r = 0; r < m; r++) {
2348         a[r] = PETSC_MAX_REAL;
2349         if (idx) idx[r] = -1;
2350       }
2351       PetscCall(VecRestoreArrayWrite(v, &a));
2352     }
2353     PetscFunctionReturn(PETSC_SUCCESS);
2354   }
2355 
2356   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2357   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2358   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2359   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2360 
2361   /* Get offdiagIdx[] for implicit 0.0 */
2362   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2363   ba = bav;
2364   bi = b->i;
2365   bj = b->j;
2366   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2367   for (r = 0; r < m; r++) {
2368     ncols = bi[r + 1] - bi[r];
2369     if (ncols == A->cmap->N - n) { /* Brow is dense */
2370       offdiagA[r]   = *ba;
2371       offdiagIdx[r] = cmap[0];
2372     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2373       offdiagA[r] = 0.0;
2374 
2375       /* Find first hole in the cmap */
2376       for (j = 0; j < ncols; j++) {
2377         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2378         if (col > j && j < cstart) {
2379           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2380           break;
2381         } else if (col > j + n && j >= cstart) {
2382           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2383           break;
2384         }
2385       }
2386       if (j == ncols && ncols < A->cmap->N - n) {
2387         /* a hole is outside compressed Bcols */
2388         if (ncols == 0) {
2389           if (cstart) {
2390             offdiagIdx[r] = 0;
2391           } else offdiagIdx[r] = cend;
2392         } else { /* ncols > 0 */
2393           offdiagIdx[r] = cmap[ncols - 1] + 1;
2394           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2395         }
2396       }
2397     }
2398 
2399     for (j = 0; j < ncols; j++) {
2400       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2401         offdiagA[r]   = *ba;
2402         offdiagIdx[r] = cmap[*bj];
2403       }
2404       ba++;
2405       bj++;
2406     }
2407   }
2408 
2409   PetscCall(VecGetArrayWrite(v, &a));
2410   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2411   for (r = 0; r < m; ++r) {
2412     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2413       a[r] = diagA[r];
2414       if (idx) idx[r] = cstart + diagIdx[r];
2415     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2416       a[r] = diagA[r];
2417       if (idx) {
2418         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2419           idx[r] = cstart + diagIdx[r];
2420         } else idx[r] = offdiagIdx[r];
2421       }
2422     } else {
2423       a[r] = offdiagA[r];
2424       if (idx) idx[r] = offdiagIdx[r];
2425     }
2426   }
2427   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2428   PetscCall(VecRestoreArrayWrite(v, &a));
2429   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2430   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2431   PetscCall(VecDestroy(&diagV));
2432   PetscCall(VecDestroy(&offdiagV));
2433   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2434   PetscFunctionReturn(PETSC_SUCCESS);
2435 }
2436 
2437 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2438 {
2439   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2440   PetscInt           m = A->rmap->n, n = A->cmap->n;
2441   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2442   PetscInt          *cmap = mat->garray;
2443   PetscInt          *diagIdx, *offdiagIdx;
2444   Vec                diagV, offdiagV;
2445   PetscScalar       *a, *diagA, *offdiagA;
2446   const PetscScalar *ba, *bav;
2447   PetscInt           r, j, col, ncols, *bi, *bj;
2448   Mat                B = mat->B;
2449   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2450 
2451   PetscFunctionBegin;
2452   /* When a process holds entire A and other processes have no entry */
2453   if (A->cmap->N == n) {
2454     PetscCall(VecGetArrayWrite(v, &diagA));
2455     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2456     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2457     PetscCall(VecDestroy(&diagV));
2458     PetscCall(VecRestoreArrayWrite(v, &diagA));
2459     PetscFunctionReturn(PETSC_SUCCESS);
2460   } else if (n == 0) {
2461     if (m) {
2462       PetscCall(VecGetArrayWrite(v, &a));
2463       for (r = 0; r < m; r++) {
2464         a[r] = PETSC_MIN_REAL;
2465         if (idx) idx[r] = -1;
2466       }
2467       PetscCall(VecRestoreArrayWrite(v, &a));
2468     }
2469     PetscFunctionReturn(PETSC_SUCCESS);
2470   }
2471 
2472   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2473   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2474   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2475   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2476 
2477   /* Get offdiagIdx[] for implicit 0.0 */
2478   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2479   ba = bav;
2480   bi = b->i;
2481   bj = b->j;
2482   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2483   for (r = 0; r < m; r++) {
2484     ncols = bi[r + 1] - bi[r];
2485     if (ncols == A->cmap->N - n) { /* Brow is dense */
2486       offdiagA[r]   = *ba;
2487       offdiagIdx[r] = cmap[0];
2488     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2489       offdiagA[r] = 0.0;
2490 
2491       /* Find first hole in the cmap */
2492       for (j = 0; j < ncols; j++) {
2493         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2494         if (col > j && j < cstart) {
2495           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2496           break;
2497         } else if (col > j + n && j >= cstart) {
2498           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2499           break;
2500         }
2501       }
2502       if (j == ncols && ncols < A->cmap->N - n) {
2503         /* a hole is outside compressed Bcols */
2504         if (ncols == 0) {
2505           if (cstart) {
2506             offdiagIdx[r] = 0;
2507           } else offdiagIdx[r] = cend;
2508         } else { /* ncols > 0 */
2509           offdiagIdx[r] = cmap[ncols - 1] + 1;
2510           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2511         }
2512       }
2513     }
2514 
2515     for (j = 0; j < ncols; j++) {
2516       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2517         offdiagA[r]   = *ba;
2518         offdiagIdx[r] = cmap[*bj];
2519       }
2520       ba++;
2521       bj++;
2522     }
2523   }
2524 
2525   PetscCall(VecGetArrayWrite(v, &a));
2526   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2527   for (r = 0; r < m; ++r) {
2528     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2529       a[r] = diagA[r];
2530       if (idx) idx[r] = cstart + diagIdx[r];
2531     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2532       a[r] = diagA[r];
2533       if (idx) {
2534         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2535           idx[r] = cstart + diagIdx[r];
2536         } else idx[r] = offdiagIdx[r];
2537       }
2538     } else {
2539       a[r] = offdiagA[r];
2540       if (idx) idx[r] = offdiagIdx[r];
2541     }
2542   }
2543   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2544   PetscCall(VecRestoreArrayWrite(v, &a));
2545   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2546   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2547   PetscCall(VecDestroy(&diagV));
2548   PetscCall(VecDestroy(&offdiagV));
2549   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2550   PetscFunctionReturn(PETSC_SUCCESS);
2551 }
2552 
2553 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2554 {
2555   Mat *dummy;
2556 
2557   PetscFunctionBegin;
2558   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2559   *newmat = *dummy;
2560   PetscCall(PetscFree(dummy));
2561   PetscFunctionReturn(PETSC_SUCCESS);
2562 }
2563 
2564 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2565 {
2566   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2567 
2568   PetscFunctionBegin;
2569   PetscCall(MatInvertBlockDiagonal(a->A, values));
2570   A->factorerrortype = a->A->factorerrortype;
2571   PetscFunctionReturn(PETSC_SUCCESS);
2572 }
2573 
2574 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2575 {
2576   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2577 
2578   PetscFunctionBegin;
2579   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2580   PetscCall(MatSetRandom(aij->A, rctx));
2581   if (x->assembled) {
2582     PetscCall(MatSetRandom(aij->B, rctx));
2583   } else {
2584     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2585   }
2586   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2587   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2588   PetscFunctionReturn(PETSC_SUCCESS);
2589 }
2590 
2591 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2592 {
2593   PetscFunctionBegin;
2594   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2595   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2596   PetscFunctionReturn(PETSC_SUCCESS);
2597 }
2598 
2599 /*@
2600   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2601 
2602   Not Collective
2603 
2604   Input Parameter:
2605 . A - the matrix
2606 
2607   Output Parameter:
2608 . nz - the number of nonzeros
2609 
2610   Level: advanced
2611 
2612 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2613 @*/
2614 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2615 {
2616   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2617   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2618   PetscBool   isaij;
2619 
2620   PetscFunctionBegin;
2621   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2622   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2623   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2624   PetscFunctionReturn(PETSC_SUCCESS);
2625 }
2626 
2627 /*@
2628   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2629 
2630   Collective
2631 
2632   Input Parameters:
2633 + A  - the matrix
2634 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2635 
2636   Level: advanced
2637 
2638 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2639 @*/
2640 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2641 {
2642   PetscFunctionBegin;
2643   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2644   PetscFunctionReturn(PETSC_SUCCESS);
2645 }
2646 
2647 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2648 {
2649   PetscBool sc = PETSC_FALSE, flg;
2650 
2651   PetscFunctionBegin;
2652   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2653   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2654   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2655   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2656   PetscOptionsHeadEnd();
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2661 {
2662   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2663   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2664 
2665   PetscFunctionBegin;
2666   if (!Y->preallocated) {
2667     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2668   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2669     PetscInt nonew = aij->nonew;
2670     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2671     aij->nonew = nonew;
2672   }
2673   PetscCall(MatShift_Basic(Y, a));
2674   PetscFunctionReturn(PETSC_SUCCESS);
2675 }
2676 
2677 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2678 {
2679   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2680 
2681   PetscFunctionBegin;
2682   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2683   PetscCall(MatMissingDiagonal(a->A, missing, d));
2684   if (d) {
2685     PetscInt rstart;
2686     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2687     *d += rstart;
2688   }
2689   PetscFunctionReturn(PETSC_SUCCESS);
2690 }
2691 
2692 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2693 {
2694   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2695 
2696   PetscFunctionBegin;
2697   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2698   PetscFunctionReturn(PETSC_SUCCESS);
2699 }
2700 
2701 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2702 {
2703   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2704 
2705   PetscFunctionBegin;
2706   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2707   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2708   PetscFunctionReturn(PETSC_SUCCESS);
2709 }
2710 
2711 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2712                                        MatGetRow_MPIAIJ,
2713                                        MatRestoreRow_MPIAIJ,
2714                                        MatMult_MPIAIJ,
2715                                        /* 4*/ MatMultAdd_MPIAIJ,
2716                                        MatMultTranspose_MPIAIJ,
2717                                        MatMultTransposeAdd_MPIAIJ,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                        /*10*/ NULL,
2722                                        NULL,
2723                                        NULL,
2724                                        MatSOR_MPIAIJ,
2725                                        MatTranspose_MPIAIJ,
2726                                        /*15*/ MatGetInfo_MPIAIJ,
2727                                        MatEqual_MPIAIJ,
2728                                        MatGetDiagonal_MPIAIJ,
2729                                        MatDiagonalScale_MPIAIJ,
2730                                        MatNorm_MPIAIJ,
2731                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2732                                        MatAssemblyEnd_MPIAIJ,
2733                                        MatSetOption_MPIAIJ,
2734                                        MatZeroEntries_MPIAIJ,
2735                                        /*24*/ MatZeroRows_MPIAIJ,
2736                                        NULL,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                        /*29*/ MatSetUp_MPI_Hash,
2741                                        NULL,
2742                                        NULL,
2743                                        MatGetDiagonalBlock_MPIAIJ,
2744                                        NULL,
2745                                        /*34*/ MatDuplicate_MPIAIJ,
2746                                        NULL,
2747                                        NULL,
2748                                        NULL,
2749                                        NULL,
2750                                        /*39*/ MatAXPY_MPIAIJ,
2751                                        MatCreateSubMatrices_MPIAIJ,
2752                                        MatIncreaseOverlap_MPIAIJ,
2753                                        MatGetValues_MPIAIJ,
2754                                        MatCopy_MPIAIJ,
2755                                        /*44*/ MatGetRowMax_MPIAIJ,
2756                                        MatScale_MPIAIJ,
2757                                        MatShift_MPIAIJ,
2758                                        MatDiagonalSet_MPIAIJ,
2759                                        MatZeroRowsColumns_MPIAIJ,
2760                                        /*49*/ MatSetRandom_MPIAIJ,
2761                                        MatGetRowIJ_MPIAIJ,
2762                                        MatRestoreRowIJ_MPIAIJ,
2763                                        NULL,
2764                                        NULL,
2765                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2766                                        NULL,
2767                                        MatSetUnfactored_MPIAIJ,
2768                                        MatPermute_MPIAIJ,
2769                                        NULL,
2770                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2771                                        MatDestroy_MPIAIJ,
2772                                        MatView_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                        /*64*/ NULL,
2776                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2777                                        NULL,
2778                                        NULL,
2779                                        NULL,
2780                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2781                                        MatGetRowMinAbs_MPIAIJ,
2782                                        NULL,
2783                                        NULL,
2784                                        NULL,
2785                                        NULL,
2786                                        /*75*/ MatFDColoringApply_AIJ,
2787                                        MatSetFromOptions_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        MatFindZeroDiagonals_MPIAIJ,
2791                                        /*80*/ NULL,
2792                                        NULL,
2793                                        NULL,
2794                                        /*83*/ MatLoad_MPIAIJ,
2795                                        MatIsSymmetric_MPIAIJ,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        /*89*/ NULL,
2801                                        NULL,
2802                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2803                                        NULL,
2804                                        NULL,
2805                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2806                                        NULL,
2807                                        NULL,
2808                                        NULL,
2809                                        MatBindToCPU_MPIAIJ,
2810                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2811                                        NULL,
2812                                        NULL,
2813                                        MatConjugate_MPIAIJ,
2814                                        NULL,
2815                                        /*104*/ MatSetValuesRow_MPIAIJ,
2816                                        MatRealPart_MPIAIJ,
2817                                        MatImaginaryPart_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        /*109*/ NULL,
2821                                        NULL,
2822                                        MatGetRowMin_MPIAIJ,
2823                                        NULL,
2824                                        MatMissingDiagonal_MPIAIJ,
2825                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2826                                        NULL,
2827                                        MatGetGhosts_MPIAIJ,
2828                                        NULL,
2829                                        NULL,
2830                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        NULL,
2834                                        MatGetMultiProcBlock_MPIAIJ,
2835                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2836                                        MatGetColumnReductions_MPIAIJ,
2837                                        MatInvertBlockDiagonal_MPIAIJ,
2838                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2839                                        MatCreateSubMatricesMPI_MPIAIJ,
2840                                        /*129*/ NULL,
2841                                        NULL,
2842                                        NULL,
2843                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2844                                        NULL,
2845                                        /*134*/ NULL,
2846                                        NULL,
2847                                        NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2851                                        NULL,
2852                                        NULL,
2853                                        MatFDColoringSetUp_MPIXAIJ,
2854                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2855                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2856                                        /*145*/ NULL,
2857                                        NULL,
2858                                        NULL,
2859                                        MatCreateGraph_Simple_AIJ,
2860                                        NULL,
2861                                        /*150*/ NULL,
2862                                        MatEliminateZeros_MPIAIJ};
2863 
2864 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2865 {
2866   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2867 
2868   PetscFunctionBegin;
2869   PetscCall(MatStoreValues(aij->A));
2870   PetscCall(MatStoreValues(aij->B));
2871   PetscFunctionReturn(PETSC_SUCCESS);
2872 }
2873 
2874 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2875 {
2876   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2877 
2878   PetscFunctionBegin;
2879   PetscCall(MatRetrieveValues(aij->A));
2880   PetscCall(MatRetrieveValues(aij->B));
2881   PetscFunctionReturn(PETSC_SUCCESS);
2882 }
2883 
2884 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2885 {
2886   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2887   PetscMPIInt size;
2888 
2889   PetscFunctionBegin;
2890   if (B->hash_active) {
2891     B->ops[0]      = b->cops;
2892     B->hash_active = PETSC_FALSE;
2893   }
2894   PetscCall(PetscLayoutSetUp(B->rmap));
2895   PetscCall(PetscLayoutSetUp(B->cmap));
2896 
2897 #if defined(PETSC_USE_CTABLE)
2898   PetscCall(PetscHMapIDestroy(&b->colmap));
2899 #else
2900   PetscCall(PetscFree(b->colmap));
2901 #endif
2902   PetscCall(PetscFree(b->garray));
2903   PetscCall(VecDestroy(&b->lvec));
2904   PetscCall(VecScatterDestroy(&b->Mvctx));
2905 
2906   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2907   PetscCall(MatDestroy(&b->B));
2908   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2909   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2910   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2911   PetscCall(MatSetType(b->B, MATSEQAIJ));
2912 
2913   PetscCall(MatDestroy(&b->A));
2914   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2915   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2916   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2917   PetscCall(MatSetType(b->A, MATSEQAIJ));
2918 
2919   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2920   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2921   B->preallocated  = PETSC_TRUE;
2922   B->was_assembled = PETSC_FALSE;
2923   B->assembled     = PETSC_FALSE;
2924   PetscFunctionReturn(PETSC_SUCCESS);
2925 }
2926 
2927 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2928 {
2929   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2930 
2931   PetscFunctionBegin;
2932   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2933   PetscCall(PetscLayoutSetUp(B->rmap));
2934   PetscCall(PetscLayoutSetUp(B->cmap));
2935 
2936 #if defined(PETSC_USE_CTABLE)
2937   PetscCall(PetscHMapIDestroy(&b->colmap));
2938 #else
2939   PetscCall(PetscFree(b->colmap));
2940 #endif
2941   PetscCall(PetscFree(b->garray));
2942   PetscCall(VecDestroy(&b->lvec));
2943   PetscCall(VecScatterDestroy(&b->Mvctx));
2944 
2945   PetscCall(MatResetPreallocation(b->A));
2946   PetscCall(MatResetPreallocation(b->B));
2947   B->preallocated  = PETSC_TRUE;
2948   B->was_assembled = PETSC_FALSE;
2949   B->assembled     = PETSC_FALSE;
2950   PetscFunctionReturn(PETSC_SUCCESS);
2951 }
2952 
2953 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2954 {
2955   Mat         mat;
2956   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2957 
2958   PetscFunctionBegin;
2959   *newmat = NULL;
2960   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2961   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2962   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2963   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2964   a = (Mat_MPIAIJ *)mat->data;
2965 
2966   mat->factortype   = matin->factortype;
2967   mat->assembled    = matin->assembled;
2968   mat->insertmode   = NOT_SET_VALUES;
2969   mat->preallocated = matin->preallocated;
2970 
2971   a->size         = oldmat->size;
2972   a->rank         = oldmat->rank;
2973   a->donotstash   = oldmat->donotstash;
2974   a->roworiented  = oldmat->roworiented;
2975   a->rowindices   = NULL;
2976   a->rowvalues    = NULL;
2977   a->getrowactive = PETSC_FALSE;
2978 
2979   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2980   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2981 
2982   if (oldmat->colmap) {
2983 #if defined(PETSC_USE_CTABLE)
2984     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2985 #else
2986     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2987     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2988 #endif
2989   } else a->colmap = NULL;
2990   if (oldmat->garray) {
2991     PetscInt len;
2992     len = oldmat->B->cmap->n;
2993     PetscCall(PetscMalloc1(len + 1, &a->garray));
2994     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2995   } else a->garray = NULL;
2996 
2997   /* It may happen MatDuplicate is called with a non-assembled matrix
2998      In fact, MatDuplicate only requires the matrix to be preallocated
2999      This may happen inside a DMCreateMatrix_Shell */
3000   if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3001   if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3002   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3003   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3004   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3005   *newmat = mat;
3006   PetscFunctionReturn(PETSC_SUCCESS);
3007 }
3008 
3009 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3010 {
3011   PetscBool isbinary, ishdf5;
3012 
3013   PetscFunctionBegin;
3014   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3015   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3016   /* force binary viewer to load .info file if it has not yet done so */
3017   PetscCall(PetscViewerSetUp(viewer));
3018   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3020   if (isbinary) {
3021     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3022   } else if (ishdf5) {
3023 #if defined(PETSC_HAVE_HDF5)
3024     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3025 #else
3026     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3027 #endif
3028   } else {
3029     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3030   }
3031   PetscFunctionReturn(PETSC_SUCCESS);
3032 }
3033 
3034 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3035 {
3036   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3037   PetscInt    *rowidxs, *colidxs;
3038   PetscScalar *matvals;
3039 
3040   PetscFunctionBegin;
3041   PetscCall(PetscViewerSetUp(viewer));
3042 
3043   /* read in matrix header */
3044   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3045   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3046   M  = header[1];
3047   N  = header[2];
3048   nz = header[3];
3049   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3050   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3051   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3052 
3053   /* set block sizes from the viewer's .info file */
3054   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3055   /* set global sizes if not set already */
3056   if (mat->rmap->N < 0) mat->rmap->N = M;
3057   if (mat->cmap->N < 0) mat->cmap->N = N;
3058   PetscCall(PetscLayoutSetUp(mat->rmap));
3059   PetscCall(PetscLayoutSetUp(mat->cmap));
3060 
3061   /* check if the matrix sizes are correct */
3062   PetscCall(MatGetSize(mat, &rows, &cols));
3063   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3064 
3065   /* read in row lengths and build row indices */
3066   PetscCall(MatGetLocalSize(mat, &m, NULL));
3067   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3068   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3069   rowidxs[0] = 0;
3070   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3071   if (nz != PETSC_MAX_INT) {
3072     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3073     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3074   }
3075 
3076   /* read in column indices and matrix values */
3077   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3078   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3079   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3080   /* store matrix indices and values */
3081   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3082   PetscCall(PetscFree(rowidxs));
3083   PetscCall(PetscFree2(colidxs, matvals));
3084   PetscFunctionReturn(PETSC_SUCCESS);
3085 }
3086 
3087 /* Not scalable because of ISAllGather() unless getting all columns. */
3088 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3089 {
3090   IS          iscol_local;
3091   PetscBool   isstride;
3092   PetscMPIInt lisstride = 0, gisstride;
3093 
3094   PetscFunctionBegin;
3095   /* check if we are grabbing all columns*/
3096   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3097 
3098   if (isstride) {
3099     PetscInt start, len, mstart, mlen;
3100     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3101     PetscCall(ISGetLocalSize(iscol, &len));
3102     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3103     if (mstart == start && mlen - mstart == len) lisstride = 1;
3104   }
3105 
3106   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3107   if (gisstride) {
3108     PetscInt N;
3109     PetscCall(MatGetSize(mat, NULL, &N));
3110     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3111     PetscCall(ISSetIdentity(iscol_local));
3112     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3113   } else {
3114     PetscInt cbs;
3115     PetscCall(ISGetBlockSize(iscol, &cbs));
3116     PetscCall(ISAllGather(iscol, &iscol_local));
3117     PetscCall(ISSetBlockSize(iscol_local, cbs));
3118   }
3119 
3120   *isseq = iscol_local;
3121   PetscFunctionReturn(PETSC_SUCCESS);
3122 }
3123 
3124 /*
3125  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3126  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3127 
3128  Input Parameters:
3129 +   mat - matrix
3130 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3131            i.e., mat->rstart <= isrow[i] < mat->rend
3132 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3133            i.e., mat->cstart <= iscol[i] < mat->cend
3134 
3135  Output Parameters:
3136 +   isrow_d - sequential row index set for retrieving mat->A
3137 .   iscol_d - sequential  column index set for retrieving mat->A
3138 .   iscol_o - sequential column index set for retrieving mat->B
3139 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3140  */
3141 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3142 {
3143   Vec             x, cmap;
3144   const PetscInt *is_idx;
3145   PetscScalar    *xarray, *cmaparray;
3146   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3147   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3148   Mat             B    = a->B;
3149   Vec             lvec = a->lvec, lcmap;
3150   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3151   MPI_Comm        comm;
3152   VecScatter      Mvctx = a->Mvctx;
3153 
3154   PetscFunctionBegin;
3155   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3156   PetscCall(ISGetLocalSize(iscol, &ncols));
3157 
3158   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3159   PetscCall(MatCreateVecs(mat, &x, NULL));
3160   PetscCall(VecSet(x, -1.0));
3161   PetscCall(VecDuplicate(x, &cmap));
3162   PetscCall(VecSet(cmap, -1.0));
3163 
3164   /* Get start indices */
3165   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3166   isstart -= ncols;
3167   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3168 
3169   PetscCall(ISGetIndices(iscol, &is_idx));
3170   PetscCall(VecGetArray(x, &xarray));
3171   PetscCall(VecGetArray(cmap, &cmaparray));
3172   PetscCall(PetscMalloc1(ncols, &idx));
3173   for (i = 0; i < ncols; i++) {
3174     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3175     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3176     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3177   }
3178   PetscCall(VecRestoreArray(x, &xarray));
3179   PetscCall(VecRestoreArray(cmap, &cmaparray));
3180   PetscCall(ISRestoreIndices(iscol, &is_idx));
3181 
3182   /* Get iscol_d */
3183   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3184   PetscCall(ISGetBlockSize(iscol, &i));
3185   PetscCall(ISSetBlockSize(*iscol_d, i));
3186 
3187   /* Get isrow_d */
3188   PetscCall(ISGetLocalSize(isrow, &m));
3189   rstart = mat->rmap->rstart;
3190   PetscCall(PetscMalloc1(m, &idx));
3191   PetscCall(ISGetIndices(isrow, &is_idx));
3192   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3193   PetscCall(ISRestoreIndices(isrow, &is_idx));
3194 
3195   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3196   PetscCall(ISGetBlockSize(isrow, &i));
3197   PetscCall(ISSetBlockSize(*isrow_d, i));
3198 
3199   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3200   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3201   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3202 
3203   PetscCall(VecDuplicate(lvec, &lcmap));
3204 
3205   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3206   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3207 
3208   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3209   /* off-process column indices */
3210   count = 0;
3211   PetscCall(PetscMalloc1(Bn, &idx));
3212   PetscCall(PetscMalloc1(Bn, &cmap1));
3213 
3214   PetscCall(VecGetArray(lvec, &xarray));
3215   PetscCall(VecGetArray(lcmap, &cmaparray));
3216   for (i = 0; i < Bn; i++) {
3217     if (PetscRealPart(xarray[i]) > -1.0) {
3218       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3219       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3220       count++;
3221     }
3222   }
3223   PetscCall(VecRestoreArray(lvec, &xarray));
3224   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3225 
3226   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3227   /* cannot ensure iscol_o has same blocksize as iscol! */
3228 
3229   PetscCall(PetscFree(idx));
3230   *garray = cmap1;
3231 
3232   PetscCall(VecDestroy(&x));
3233   PetscCall(VecDestroy(&cmap));
3234   PetscCall(VecDestroy(&lcmap));
3235   PetscFunctionReturn(PETSC_SUCCESS);
3236 }
3237 
3238 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3239 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3240 {
3241   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3242   Mat         M = NULL;
3243   MPI_Comm    comm;
3244   IS          iscol_d, isrow_d, iscol_o;
3245   Mat         Asub = NULL, Bsub = NULL;
3246   PetscInt    n;
3247 
3248   PetscFunctionBegin;
3249   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3250 
3251   if (call == MAT_REUSE_MATRIX) {
3252     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3253     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3254     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3255 
3256     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3257     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3258 
3259     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3260     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3261 
3262     /* Update diagonal and off-diagonal portions of submat */
3263     asub = (Mat_MPIAIJ *)(*submat)->data;
3264     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3265     PetscCall(ISGetLocalSize(iscol_o, &n));
3266     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3267     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3268     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3269 
3270   } else { /* call == MAT_INITIAL_MATRIX) */
3271     const PetscInt *garray;
3272     PetscInt        BsubN;
3273 
3274     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3275     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3276 
3277     /* Create local submatrices Asub and Bsub */
3278     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3279     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3280 
3281     /* Create submatrix M */
3282     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3283 
3284     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3285     asub = (Mat_MPIAIJ *)M->data;
3286 
3287     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3288     n = asub->B->cmap->N;
3289     if (BsubN > n) {
3290       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3291       const PetscInt *idx;
3292       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3293       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3294 
3295       PetscCall(PetscMalloc1(n, &idx_new));
3296       j = 0;
3297       PetscCall(ISGetIndices(iscol_o, &idx));
3298       for (i = 0; i < n; i++) {
3299         if (j >= BsubN) break;
3300         while (subgarray[i] > garray[j]) j++;
3301 
3302         if (subgarray[i] == garray[j]) {
3303           idx_new[i] = idx[j++];
3304         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3305       }
3306       PetscCall(ISRestoreIndices(iscol_o, &idx));
3307 
3308       PetscCall(ISDestroy(&iscol_o));
3309       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3310 
3311     } else if (BsubN < n) {
3312       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3313     }
3314 
3315     PetscCall(PetscFree(garray));
3316     *submat = M;
3317 
3318     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3319     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3320     PetscCall(ISDestroy(&isrow_d));
3321 
3322     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3323     PetscCall(ISDestroy(&iscol_d));
3324 
3325     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3326     PetscCall(ISDestroy(&iscol_o));
3327   }
3328   PetscFunctionReturn(PETSC_SUCCESS);
3329 }
3330 
3331 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3332 {
3333   IS        iscol_local = NULL, isrow_d;
3334   PetscInt  csize;
3335   PetscInt  n, i, j, start, end;
3336   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3337   MPI_Comm  comm;
3338 
3339   PetscFunctionBegin;
3340   /* If isrow has same processor distribution as mat,
3341      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3342   if (call == MAT_REUSE_MATRIX) {
3343     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3344     if (isrow_d) {
3345       sameRowDist  = PETSC_TRUE;
3346       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3347     } else {
3348       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3349       if (iscol_local) {
3350         sameRowDist  = PETSC_TRUE;
3351         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3352       }
3353     }
3354   } else {
3355     /* Check if isrow has same processor distribution as mat */
3356     sameDist[0] = PETSC_FALSE;
3357     PetscCall(ISGetLocalSize(isrow, &n));
3358     if (!n) {
3359       sameDist[0] = PETSC_TRUE;
3360     } else {
3361       PetscCall(ISGetMinMax(isrow, &i, &j));
3362       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3363       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3364     }
3365 
3366     /* Check if iscol has same processor distribution as mat */
3367     sameDist[1] = PETSC_FALSE;
3368     PetscCall(ISGetLocalSize(iscol, &n));
3369     if (!n) {
3370       sameDist[1] = PETSC_TRUE;
3371     } else {
3372       PetscCall(ISGetMinMax(iscol, &i, &j));
3373       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3374       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3375     }
3376 
3377     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3378     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3379     sameRowDist = tsameDist[0];
3380   }
3381 
3382   if (sameRowDist) {
3383     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3384       /* isrow and iscol have same processor distribution as mat */
3385       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3386       PetscFunctionReturn(PETSC_SUCCESS);
3387     } else { /* sameRowDist */
3388       /* isrow has same processor distribution as mat */
3389       if (call == MAT_INITIAL_MATRIX) {
3390         PetscBool sorted;
3391         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3392         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3393         PetscCall(ISGetSize(iscol, &i));
3394         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3395 
3396         PetscCall(ISSorted(iscol_local, &sorted));
3397         if (sorted) {
3398           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3399           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3400           PetscFunctionReturn(PETSC_SUCCESS);
3401         }
3402       } else { /* call == MAT_REUSE_MATRIX */
3403         IS iscol_sub;
3404         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3405         if (iscol_sub) {
3406           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3407           PetscFunctionReturn(PETSC_SUCCESS);
3408         }
3409       }
3410     }
3411   }
3412 
3413   /* General case: iscol -> iscol_local which has global size of iscol */
3414   if (call == MAT_REUSE_MATRIX) {
3415     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3416     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3417   } else {
3418     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3419   }
3420 
3421   PetscCall(ISGetLocalSize(iscol, &csize));
3422   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3423 
3424   if (call == MAT_INITIAL_MATRIX) {
3425     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3426     PetscCall(ISDestroy(&iscol_local));
3427   }
3428   PetscFunctionReturn(PETSC_SUCCESS);
3429 }
3430 
3431 /*@C
3432   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3433   and "off-diagonal" part of the matrix in CSR format.
3434 
3435   Collective
3436 
3437   Input Parameters:
3438 + comm   - MPI communicator
3439 . A      - "diagonal" portion of matrix
3440 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3441 - garray - global index of `B` columns
3442 
3443   Output Parameter:
3444 . mat - the matrix, with input `A` as its local diagonal matrix
3445 
3446   Level: advanced
3447 
3448   Notes:
3449   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3450 
3451   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3452 
3453 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3454 @*/
3455 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3456 {
3457   Mat_MPIAIJ        *maij;
3458   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3459   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3460   const PetscScalar *oa;
3461   Mat                Bnew;
3462   PetscInt           m, n, N;
3463   MatType            mpi_mat_type;
3464 
3465   PetscFunctionBegin;
3466   PetscCall(MatCreate(comm, mat));
3467   PetscCall(MatGetSize(A, &m, &n));
3468   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3469   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3470   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3471   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3472 
3473   /* Get global columns of mat */
3474   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3475 
3476   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3477   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3478   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3479   PetscCall(MatSetType(*mat, mpi_mat_type));
3480 
3481   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3482   maij = (Mat_MPIAIJ *)(*mat)->data;
3483 
3484   (*mat)->preallocated = PETSC_TRUE;
3485 
3486   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3487   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3488 
3489   /* Set A as diagonal portion of *mat */
3490   maij->A = A;
3491 
3492   nz = oi[m];
3493   for (i = 0; i < nz; i++) {
3494     col   = oj[i];
3495     oj[i] = garray[col];
3496   }
3497 
3498   /* Set Bnew as off-diagonal portion of *mat */
3499   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3500   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3501   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3502   bnew        = (Mat_SeqAIJ *)Bnew->data;
3503   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3504   maij->B     = Bnew;
3505 
3506   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3507 
3508   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3509   b->free_a       = PETSC_FALSE;
3510   b->free_ij      = PETSC_FALSE;
3511   PetscCall(MatDestroy(&B));
3512 
3513   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3514   bnew->free_a       = PETSC_TRUE;
3515   bnew->free_ij      = PETSC_TRUE;
3516 
3517   /* condense columns of maij->B */
3518   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3519   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3520   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3521   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3522   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3523   PetscFunctionReturn(PETSC_SUCCESS);
3524 }
3525 
3526 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3527 
3528 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3529 {
3530   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3531   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3532   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3533   Mat             M, Msub, B = a->B;
3534   MatScalar      *aa;
3535   Mat_SeqAIJ     *aij;
3536   PetscInt       *garray = a->garray, *colsub, Ncols;
3537   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3538   IS              iscol_sub, iscmap;
3539   const PetscInt *is_idx, *cmap;
3540   PetscBool       allcolumns = PETSC_FALSE;
3541   MPI_Comm        comm;
3542 
3543   PetscFunctionBegin;
3544   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3545   if (call == MAT_REUSE_MATRIX) {
3546     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3547     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3548     PetscCall(ISGetLocalSize(iscol_sub, &count));
3549 
3550     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3551     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3552 
3553     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3554     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3555 
3556     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3557 
3558   } else { /* call == MAT_INITIAL_MATRIX) */
3559     PetscBool flg;
3560 
3561     PetscCall(ISGetLocalSize(iscol, &n));
3562     PetscCall(ISGetSize(iscol, &Ncols));
3563 
3564     /* (1) iscol -> nonscalable iscol_local */
3565     /* Check for special case: each processor gets entire matrix columns */
3566     PetscCall(ISIdentity(iscol_local, &flg));
3567     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3568     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3569     if (allcolumns) {
3570       iscol_sub = iscol_local;
3571       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3572       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3573 
3574     } else {
3575       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3576       PetscInt *idx, *cmap1, k;
3577       PetscCall(PetscMalloc1(Ncols, &idx));
3578       PetscCall(PetscMalloc1(Ncols, &cmap1));
3579       PetscCall(ISGetIndices(iscol_local, &is_idx));
3580       count = 0;
3581       k     = 0;
3582       for (i = 0; i < Ncols; i++) {
3583         j = is_idx[i];
3584         if (j >= cstart && j < cend) {
3585           /* diagonal part of mat */
3586           idx[count]     = j;
3587           cmap1[count++] = i; /* column index in submat */
3588         } else if (Bn) {
3589           /* off-diagonal part of mat */
3590           if (j == garray[k]) {
3591             idx[count]     = j;
3592             cmap1[count++] = i; /* column index in submat */
3593           } else if (j > garray[k]) {
3594             while (j > garray[k] && k < Bn - 1) k++;
3595             if (j == garray[k]) {
3596               idx[count]     = j;
3597               cmap1[count++] = i; /* column index in submat */
3598             }
3599           }
3600         }
3601       }
3602       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3603 
3604       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3605       PetscCall(ISGetBlockSize(iscol, &cbs));
3606       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3607 
3608       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3609     }
3610 
3611     /* (3) Create sequential Msub */
3612     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3613   }
3614 
3615   PetscCall(ISGetLocalSize(iscol_sub, &count));
3616   aij = (Mat_SeqAIJ *)(Msub)->data;
3617   ii  = aij->i;
3618   PetscCall(ISGetIndices(iscmap, &cmap));
3619 
3620   /*
3621       m - number of local rows
3622       Ncols - number of columns (same on all processors)
3623       rstart - first row in new global matrix generated
3624   */
3625   PetscCall(MatGetSize(Msub, &m, NULL));
3626 
3627   if (call == MAT_INITIAL_MATRIX) {
3628     /* (4) Create parallel newmat */
3629     PetscMPIInt rank, size;
3630     PetscInt    csize;
3631 
3632     PetscCallMPI(MPI_Comm_size(comm, &size));
3633     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3634 
3635     /*
3636         Determine the number of non-zeros in the diagonal and off-diagonal
3637         portions of the matrix in order to do correct preallocation
3638     */
3639 
3640     /* first get start and end of "diagonal" columns */
3641     PetscCall(ISGetLocalSize(iscol, &csize));
3642     if (csize == PETSC_DECIDE) {
3643       PetscCall(ISGetSize(isrow, &mglobal));
3644       if (mglobal == Ncols) { /* square matrix */
3645         nlocal = m;
3646       } else {
3647         nlocal = Ncols / size + ((Ncols % size) > rank);
3648       }
3649     } else {
3650       nlocal = csize;
3651     }
3652     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3653     rstart = rend - nlocal;
3654     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3655 
3656     /* next, compute all the lengths */
3657     jj = aij->j;
3658     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3659     olens = dlens + m;
3660     for (i = 0; i < m; i++) {
3661       jend = ii[i + 1] - ii[i];
3662       olen = 0;
3663       dlen = 0;
3664       for (j = 0; j < jend; j++) {
3665         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3666         else dlen++;
3667         jj++;
3668       }
3669       olens[i] = olen;
3670       dlens[i] = dlen;
3671     }
3672 
3673     PetscCall(ISGetBlockSize(isrow, &bs));
3674     PetscCall(ISGetBlockSize(iscol, &cbs));
3675 
3676     PetscCall(MatCreate(comm, &M));
3677     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3678     PetscCall(MatSetBlockSizes(M, bs, cbs));
3679     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3680     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3681     PetscCall(PetscFree(dlens));
3682 
3683   } else { /* call == MAT_REUSE_MATRIX */
3684     M = *newmat;
3685     PetscCall(MatGetLocalSize(M, &i, NULL));
3686     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3687     PetscCall(MatZeroEntries(M));
3688     /*
3689          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3690        rather than the slower MatSetValues().
3691     */
3692     M->was_assembled = PETSC_TRUE;
3693     M->assembled     = PETSC_FALSE;
3694   }
3695 
3696   /* (5) Set values of Msub to *newmat */
3697   PetscCall(PetscMalloc1(count, &colsub));
3698   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3699 
3700   jj = aij->j;
3701   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3702   for (i = 0; i < m; i++) {
3703     row = rstart + i;
3704     nz  = ii[i + 1] - ii[i];
3705     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3706     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3707     jj += nz;
3708     aa += nz;
3709   }
3710   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3711   PetscCall(ISRestoreIndices(iscmap, &cmap));
3712 
3713   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3714   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3715 
3716   PetscCall(PetscFree(colsub));
3717 
3718   /* save Msub, iscol_sub and iscmap used in processor for next request */
3719   if (call == MAT_INITIAL_MATRIX) {
3720     *newmat = M;
3721     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3722     PetscCall(MatDestroy(&Msub));
3723 
3724     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3725     PetscCall(ISDestroy(&iscol_sub));
3726 
3727     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3728     PetscCall(ISDestroy(&iscmap));
3729 
3730     if (iscol_local) {
3731       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3732       PetscCall(ISDestroy(&iscol_local));
3733     }
3734   }
3735   PetscFunctionReturn(PETSC_SUCCESS);
3736 }
3737 
3738 /*
3739     Not great since it makes two copies of the submatrix, first an SeqAIJ
3740   in local and then by concatenating the local matrices the end result.
3741   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3742 
3743   This requires a sequential iscol with all indices.
3744 */
3745 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3746 {
3747   PetscMPIInt rank, size;
3748   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3749   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3750   Mat         M, Mreuse;
3751   MatScalar  *aa, *vwork;
3752   MPI_Comm    comm;
3753   Mat_SeqAIJ *aij;
3754   PetscBool   colflag, allcolumns = PETSC_FALSE;
3755 
3756   PetscFunctionBegin;
3757   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3758   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3759   PetscCallMPI(MPI_Comm_size(comm, &size));
3760 
3761   /* Check for special case: each processor gets entire matrix columns */
3762   PetscCall(ISIdentity(iscol, &colflag));
3763   PetscCall(ISGetLocalSize(iscol, &n));
3764   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3765   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3766 
3767   if (call == MAT_REUSE_MATRIX) {
3768     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3769     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3770     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3771   } else {
3772     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3773   }
3774 
3775   /*
3776       m - number of local rows
3777       n - number of columns (same on all processors)
3778       rstart - first row in new global matrix generated
3779   */
3780   PetscCall(MatGetSize(Mreuse, &m, &n));
3781   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3782   if (call == MAT_INITIAL_MATRIX) {
3783     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3784     ii  = aij->i;
3785     jj  = aij->j;
3786 
3787     /*
3788         Determine the number of non-zeros in the diagonal and off-diagonal
3789         portions of the matrix in order to do correct preallocation
3790     */
3791 
3792     /* first get start and end of "diagonal" columns */
3793     if (csize == PETSC_DECIDE) {
3794       PetscCall(ISGetSize(isrow, &mglobal));
3795       if (mglobal == n) { /* square matrix */
3796         nlocal = m;
3797       } else {
3798         nlocal = n / size + ((n % size) > rank);
3799       }
3800     } else {
3801       nlocal = csize;
3802     }
3803     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3804     rstart = rend - nlocal;
3805     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3806 
3807     /* next, compute all the lengths */
3808     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3809     olens = dlens + m;
3810     for (i = 0; i < m; i++) {
3811       jend = ii[i + 1] - ii[i];
3812       olen = 0;
3813       dlen = 0;
3814       for (j = 0; j < jend; j++) {
3815         if (*jj < rstart || *jj >= rend) olen++;
3816         else dlen++;
3817         jj++;
3818       }
3819       olens[i] = olen;
3820       dlens[i] = dlen;
3821     }
3822     PetscCall(MatCreate(comm, &M));
3823     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3824     PetscCall(MatSetBlockSizes(M, bs, cbs));
3825     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3826     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3827     PetscCall(PetscFree(dlens));
3828   } else {
3829     PetscInt ml, nl;
3830 
3831     M = *newmat;
3832     PetscCall(MatGetLocalSize(M, &ml, &nl));
3833     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3834     PetscCall(MatZeroEntries(M));
3835     /*
3836          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3837        rather than the slower MatSetValues().
3838     */
3839     M->was_assembled = PETSC_TRUE;
3840     M->assembled     = PETSC_FALSE;
3841   }
3842   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3843   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3844   ii  = aij->i;
3845   jj  = aij->j;
3846 
3847   /* trigger copy to CPU if needed */
3848   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3849   for (i = 0; i < m; i++) {
3850     row   = rstart + i;
3851     nz    = ii[i + 1] - ii[i];
3852     cwork = jj;
3853     jj += nz;
3854     vwork = aa;
3855     aa += nz;
3856     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3857   }
3858   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3859 
3860   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3861   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3862   *newmat = M;
3863 
3864   /* save submatrix used in processor for next request */
3865   if (call == MAT_INITIAL_MATRIX) {
3866     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3867     PetscCall(MatDestroy(&Mreuse));
3868   }
3869   PetscFunctionReturn(PETSC_SUCCESS);
3870 }
3871 
3872 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3873 {
3874   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3875   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3876   const PetscInt *JJ;
3877   PetscBool       nooffprocentries;
3878   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3879 
3880   PetscFunctionBegin;
3881   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3882 
3883   PetscCall(PetscLayoutSetUp(B->rmap));
3884   PetscCall(PetscLayoutSetUp(B->cmap));
3885   m      = B->rmap->n;
3886   cstart = B->cmap->rstart;
3887   cend   = B->cmap->rend;
3888   rstart = B->rmap->rstart;
3889 
3890   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3891 
3892   if (PetscDefined(USE_DEBUG)) {
3893     for (i = 0; i < m; i++) {
3894       nnz = Ii[i + 1] - Ii[i];
3895       JJ  = J ? J + Ii[i] : NULL;
3896       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3897       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3898       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3899     }
3900   }
3901 
3902   for (i = 0; i < m; i++) {
3903     nnz     = Ii[i + 1] - Ii[i];
3904     JJ      = J ? J + Ii[i] : NULL;
3905     nnz_max = PetscMax(nnz_max, nnz);
3906     d       = 0;
3907     for (j = 0; j < nnz; j++) {
3908       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3909     }
3910     d_nnz[i] = d;
3911     o_nnz[i] = nnz - d;
3912   }
3913   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3914   PetscCall(PetscFree2(d_nnz, o_nnz));
3915 
3916   for (i = 0; i < m; i++) {
3917     ii = i + rstart;
3918     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J ? J + Ii[i] : NULL, v ? v + Ii[i] : NULL, INSERT_VALUES));
3919   }
3920   nooffprocentries    = B->nooffprocentries;
3921   B->nooffprocentries = PETSC_TRUE;
3922   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3923   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3924   B->nooffprocentries = nooffprocentries;
3925 
3926   /* count number of entries below block diagonal */
3927   PetscCall(PetscFree(Aij->ld));
3928   PetscCall(PetscCalloc1(m, &ld));
3929   Aij->ld = ld;
3930   for (i = 0; i < m; i++) {
3931     nnz = Ii[i + 1] - Ii[i];
3932     j   = 0;
3933     while (j < nnz && J[j] < cstart) j++;
3934     ld[i] = j;
3935     if (J) J += nnz;
3936   }
3937 
3938   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3939   PetscFunctionReturn(PETSC_SUCCESS);
3940 }
3941 
3942 /*@
3943   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3944   (the default parallel PETSc format).
3945 
3946   Collective
3947 
3948   Input Parameters:
3949 + B - the matrix
3950 . i - the indices into j for the start of each local row (starts with zero)
3951 . j - the column indices for each local row (starts with zero)
3952 - v - optional values in the matrix
3953 
3954   Level: developer
3955 
3956   Notes:
3957   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3958   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3959   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3960 
3961   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3962 
3963   The format which is used for the sparse matrix input, is equivalent to a
3964   row-major ordering.. i.e for the following matrix, the input data expected is
3965   as shown
3966 
3967 .vb
3968         1 0 0
3969         2 0 3     P0
3970        -------
3971         4 5 6     P1
3972 
3973      Process0 [P0] rows_owned=[0,1]
3974         i =  {0,1,3}  [size = nrow+1  = 2+1]
3975         j =  {0,0,2}  [size = 3]
3976         v =  {1,2,3}  [size = 3]
3977 
3978      Process1 [P1] rows_owned=[2]
3979         i =  {0,3}    [size = nrow+1  = 1+1]
3980         j =  {0,1,2}  [size = 3]
3981         v =  {4,5,6}  [size = 3]
3982 .ve
3983 
3984 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
3985           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3986 @*/
3987 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3988 {
3989   PetscFunctionBegin;
3990   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3991   PetscFunctionReturn(PETSC_SUCCESS);
3992 }
3993 
3994 /*@C
3995   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3996   (the default parallel PETSc format).  For good matrix assembly performance
3997   the user should preallocate the matrix storage by setting the parameters
3998   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
3999 
4000   Collective
4001 
4002   Input Parameters:
4003 + B     - the matrix
4004 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4005            (same value is used for all local rows)
4006 . d_nnz - array containing the number of nonzeros in the various rows of the
4007            DIAGONAL portion of the local submatrix (possibly different for each row)
4008            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4009            The size of this array is equal to the number of local rows, i.e 'm'.
4010            For matrices that will be factored, you must leave room for (and set)
4011            the diagonal entry even if it is zero.
4012 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4013            submatrix (same value is used for all local rows).
4014 - o_nnz - array containing the number of nonzeros in the various rows of the
4015            OFF-DIAGONAL portion of the local submatrix (possibly different for
4016            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4017            structure. The size of this array is equal to the number
4018            of local rows, i.e 'm'.
4019 
4020   Example Usage:
4021   Consider the following 8x8 matrix with 34 non-zero values, that is
4022   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4023   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4024   as follows
4025 
4026 .vb
4027             1  2  0  |  0  3  0  |  0  4
4028     Proc0   0  5  6  |  7  0  0  |  8  0
4029             9  0 10  | 11  0  0  | 12  0
4030     -------------------------------------
4031            13  0 14  | 15 16 17  |  0  0
4032     Proc1   0 18  0  | 19 20 21  |  0  0
4033             0  0  0  | 22 23  0  | 24  0
4034     -------------------------------------
4035     Proc2  25 26 27  |  0  0 28  | 29  0
4036            30  0  0  | 31 32 33  |  0 34
4037 .ve
4038 
4039   This can be represented as a collection of submatrices as
4040 .vb
4041       A B C
4042       D E F
4043       G H I
4044 .ve
4045 
4046   Where the submatrices A,B,C are owned by proc0, D,E,F are
4047   owned by proc1, G,H,I are owned by proc2.
4048 
4049   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4050   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4051   The 'M','N' parameters are 8,8, and have the same values on all procs.
4052 
4053   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4054   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4055   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4056   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4057   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4058   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4059 
4060   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4061   allocated for every row of the local diagonal submatrix, and `o_nz`
4062   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4063   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4064   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4065   In this case, the values of `d_nz`, `o_nz` are
4066 .vb
4067      proc0  dnz = 2, o_nz = 2
4068      proc1  dnz = 3, o_nz = 2
4069      proc2  dnz = 1, o_nz = 4
4070 .ve
4071   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4072   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4073   for proc3. i.e we are using 12+15+10=37 storage locations to store
4074   34 values.
4075 
4076   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4077   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4078   In the above case the values for `d_nnz`, `o_nnz` are
4079 .vb
4080      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4081      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4082      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4083 .ve
4084   Here the space allocated is sum of all the above values i.e 34, and
4085   hence pre-allocation is perfect.
4086 
4087   Level: intermediate
4088 
4089   Notes:
4090   If the *_nnz parameter is given then the *_nz parameter is ignored
4091 
4092   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4093   storage.  The stored row and column indices begin with zero.
4094   See [Sparse Matrices](sec_matsparse) for details.
4095 
4096   The parallel matrix is partitioned such that the first m0 rows belong to
4097   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4098   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4099 
4100   The DIAGONAL portion of the local submatrix of a processor can be defined
4101   as the submatrix which is obtained by extraction the part corresponding to
4102   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4103   first row that belongs to the processor, r2 is the last row belonging to
4104   the this processor, and c1-c2 is range of indices of the local part of a
4105   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4106   common case of a square matrix, the row and column ranges are the same and
4107   the DIAGONAL part is also square. The remaining portion of the local
4108   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4109 
4110   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4111 
4112   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4113   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4114   You can also run with the option `-info` and look for messages with the string
4115   malloc in them to see if additional memory allocation was needed.
4116 
4117 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4118           `MatGetInfo()`, `PetscSplitOwnership()`
4119 @*/
4120 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4121 {
4122   PetscFunctionBegin;
4123   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4124   PetscValidType(B, 1);
4125   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4126   PetscFunctionReturn(PETSC_SUCCESS);
4127 }
4128 
4129 /*@
4130   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4131   CSR format for the local rows.
4132 
4133   Collective
4134 
4135   Input Parameters:
4136 + comm - MPI communicator
4137 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4138 . n    - This value should be the same as the local size used in creating the
4139        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4140        calculated if N is given) For square matrices n is almost always m.
4141 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4142 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4143 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4144 . j    - column indices
4145 - a    - optional matrix values
4146 
4147   Output Parameter:
4148 . mat - the matrix
4149 
4150   Level: intermediate
4151 
4152   Notes:
4153   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4154   thus you CANNOT change the matrix entries by changing the values of a[] after you have
4155   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4156 
4157   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4158 
4159   The format which is used for the sparse matrix input, is equivalent to a
4160   row-major ordering.. i.e for the following matrix, the input data expected is
4161   as shown
4162 
4163   Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4164 .vb
4165         1 0 0
4166         2 0 3     P0
4167        -------
4168         4 5 6     P1
4169 
4170      Process0 [P0] rows_owned=[0,1]
4171         i =  {0,1,3}  [size = nrow+1  = 2+1]
4172         j =  {0,0,2}  [size = 3]
4173         v =  {1,2,3}  [size = 3]
4174 
4175      Process1 [P1] rows_owned=[2]
4176         i =  {0,3}    [size = nrow+1  = 1+1]
4177         j =  {0,1,2}  [size = 3]
4178         v =  {4,5,6}  [size = 3]
4179 .ve
4180 
4181 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4182           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4183 @*/
4184 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4185 {
4186   PetscFunctionBegin;
4187   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4188   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4189   PetscCall(MatCreate(comm, mat));
4190   PetscCall(MatSetSizes(*mat, m, n, M, N));
4191   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4192   PetscCall(MatSetType(*mat, MATMPIAIJ));
4193   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4194   PetscFunctionReturn(PETSC_SUCCESS);
4195 }
4196 
4197 /*@
4198   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4199   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4200   from `MatCreateMPIAIJWithArrays()`
4201 
4202   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4203 
4204   Collective
4205 
4206   Input Parameters:
4207 + mat - the matrix
4208 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4209 . n   - This value should be the same as the local size used in creating the
4210        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4211        calculated if N is given) For square matrices n is almost always m.
4212 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4213 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4214 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4215 . J   - column indices
4216 - v   - matrix values
4217 
4218   Level: deprecated
4219 
4220 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4221           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`
4222 @*/
4223 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4224 {
4225   PetscInt        nnz, i;
4226   PetscBool       nooffprocentries;
4227   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4228   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4229   PetscScalar    *ad, *ao;
4230   PetscInt        ldi, Iii, md;
4231   const PetscInt *Adi = Ad->i;
4232   PetscInt       *ld  = Aij->ld;
4233 
4234   PetscFunctionBegin;
4235   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4236   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4237   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4238   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4239 
4240   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4241   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4242 
4243   for (i = 0; i < m; i++) {
4244     nnz = Ii[i + 1] - Ii[i];
4245     Iii = Ii[i];
4246     ldi = ld[i];
4247     md  = Adi[i + 1] - Adi[i];
4248     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4249     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4250     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4251     ad += md;
4252     ao += nnz - md;
4253   }
4254   nooffprocentries      = mat->nooffprocentries;
4255   mat->nooffprocentries = PETSC_TRUE;
4256   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4257   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4258   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4259   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4260   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4261   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4262   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4263   mat->nooffprocentries = nooffprocentries;
4264   PetscFunctionReturn(PETSC_SUCCESS);
4265 }
4266 
4267 /*@
4268   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4269 
4270   Collective
4271 
4272   Input Parameters:
4273 + mat - the matrix
4274 - v   - matrix values, stored by row
4275 
4276   Level: intermediate
4277 
4278   Note:
4279   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4280 
4281 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4282           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4283 @*/
4284 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4285 {
4286   PetscInt        nnz, i, m;
4287   PetscBool       nooffprocentries;
4288   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4289   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4290   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4291   PetscScalar    *ad, *ao;
4292   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4293   PetscInt        ldi, Iii, md;
4294   PetscInt       *ld = Aij->ld;
4295 
4296   PetscFunctionBegin;
4297   m = mat->rmap->n;
4298 
4299   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4300   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4301   Iii = 0;
4302   for (i = 0; i < m; i++) {
4303     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4304     ldi = ld[i];
4305     md  = Adi[i + 1] - Adi[i];
4306     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4307     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4308     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4309     ad += md;
4310     ao += nnz - md;
4311     Iii += nnz;
4312   }
4313   nooffprocentries      = mat->nooffprocentries;
4314   mat->nooffprocentries = PETSC_TRUE;
4315   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4316   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4317   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4318   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4319   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4320   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4321   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4322   mat->nooffprocentries = nooffprocentries;
4323   PetscFunctionReturn(PETSC_SUCCESS);
4324 }
4325 
4326 /*@C
4327   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4328   (the default parallel PETSc format).  For good matrix assembly performance
4329   the user should preallocate the matrix storage by setting the parameters
4330   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4331 
4332   Collective
4333 
4334   Input Parameters:
4335 + comm  - MPI communicator
4336 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4337            This value should be the same as the local size used in creating the
4338            y vector for the matrix-vector product y = Ax.
4339 . n     - This value should be the same as the local size used in creating the
4340        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4341        calculated if N is given) For square matrices n is almost always m.
4342 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4343 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4344 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4345            (same value is used for all local rows)
4346 . d_nnz - array containing the number of nonzeros in the various rows of the
4347            DIAGONAL portion of the local submatrix (possibly different for each row)
4348            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4349            The size of this array is equal to the number of local rows, i.e 'm'.
4350 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4351            submatrix (same value is used for all local rows).
4352 - o_nnz - array containing the number of nonzeros in the various rows of the
4353            OFF-DIAGONAL portion of the local submatrix (possibly different for
4354            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4355            structure. The size of this array is equal to the number
4356            of local rows, i.e 'm'.
4357 
4358   Output Parameter:
4359 . A - the matrix
4360 
4361   Options Database Keys:
4362 + -mat_no_inode                     - Do not use inodes
4363 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4364 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4365         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4366         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4367 
4368   Level: intermediate
4369 
4370   Notes:
4371   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4372   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4373   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4374 
4375   If the *_nnz parameter is given then the *_nz parameter is ignored
4376 
4377   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4378   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4379   storage requirements for this matrix.
4380 
4381   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4382   processor than it must be used on all processors that share the object for
4383   that argument.
4384 
4385   The user MUST specify either the local or global matrix dimensions
4386   (possibly both).
4387 
4388   The parallel matrix is partitioned across processors such that the
4389   first m0 rows belong to process 0, the next m1 rows belong to
4390   process 1, the next m2 rows belong to process 2 etc.. where
4391   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4392   values corresponding to [m x N] submatrix.
4393 
4394   The columns are logically partitioned with the n0 columns belonging
4395   to 0th partition, the next n1 columns belonging to the next
4396   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4397 
4398   The DIAGONAL portion of the local submatrix on any given processor
4399   is the submatrix corresponding to the rows and columns m,n
4400   corresponding to the given processor. i.e diagonal matrix on
4401   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4402   etc. The remaining portion of the local submatrix [m x (N-n)]
4403   constitute the OFF-DIAGONAL portion. The example below better
4404   illustrates this concept.
4405 
4406   For a square global matrix we define each processor's diagonal portion
4407   to be its local rows and the corresponding columns (a square submatrix);
4408   each processor's off-diagonal portion encompasses the remainder of the
4409   local matrix (a rectangular submatrix).
4410 
4411   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4412 
4413   When calling this routine with a single process communicator, a matrix of
4414   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4415   type of communicator, use the construction mechanism
4416 .vb
4417   MatCreate(..., &A);
4418   MatSetType(A, MATMPIAIJ);
4419   MatSetSizes(A, m, n, M, N);
4420   MatMPIAIJSetPreallocation(A, ...);
4421 .ve
4422 
4423   By default, this format uses inodes (identical nodes) when possible.
4424   We search for consecutive rows with the same nonzero structure, thereby
4425   reusing matrix information to achieve increased efficiency.
4426 
4427   Example Usage:
4428   Consider the following 8x8 matrix with 34 non-zero values, that is
4429   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4430   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4431   as follows
4432 
4433 .vb
4434             1  2  0  |  0  3  0  |  0  4
4435     Proc0   0  5  6  |  7  0  0  |  8  0
4436             9  0 10  | 11  0  0  | 12  0
4437     -------------------------------------
4438            13  0 14  | 15 16 17  |  0  0
4439     Proc1   0 18  0  | 19 20 21  |  0  0
4440             0  0  0  | 22 23  0  | 24  0
4441     -------------------------------------
4442     Proc2  25 26 27  |  0  0 28  | 29  0
4443            30  0  0  | 31 32 33  |  0 34
4444 .ve
4445 
4446   This can be represented as a collection of submatrices as
4447 
4448 .vb
4449       A B C
4450       D E F
4451       G H I
4452 .ve
4453 
4454   Where the submatrices A,B,C are owned by proc0, D,E,F are
4455   owned by proc1, G,H,I are owned by proc2.
4456 
4457   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4458   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4459   The 'M','N' parameters are 8,8, and have the same values on all procs.
4460 
4461   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4462   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4463   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4464   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4465   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4466   matrix, ans [DF] as another SeqAIJ matrix.
4467 
4468   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4469   allocated for every row of the local diagonal submatrix, and `o_nz`
4470   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4471   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4472   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4473   In this case, the values of `d_nz`,`o_nz` are
4474 .vb
4475      proc0  dnz = 2, o_nz = 2
4476      proc1  dnz = 3, o_nz = 2
4477      proc2  dnz = 1, o_nz = 4
4478 .ve
4479   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4480   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4481   for proc3. i.e we are using 12+15+10=37 storage locations to store
4482   34 values.
4483 
4484   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4485   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4486   In the above case the values for d_nnz,o_nnz are
4487 .vb
4488      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4489      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4490      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4491 .ve
4492   Here the space allocated is sum of all the above values i.e 34, and
4493   hence pre-allocation is perfect.
4494 
4495 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4496           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4497 @*/
4498 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4499 {
4500   PetscMPIInt size;
4501 
4502   PetscFunctionBegin;
4503   PetscCall(MatCreate(comm, A));
4504   PetscCall(MatSetSizes(*A, m, n, M, N));
4505   PetscCallMPI(MPI_Comm_size(comm, &size));
4506   if (size > 1) {
4507     PetscCall(MatSetType(*A, MATMPIAIJ));
4508     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4509   } else {
4510     PetscCall(MatSetType(*A, MATSEQAIJ));
4511     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4512   }
4513   PetscFunctionReturn(PETSC_SUCCESS);
4514 }
4515 
4516 /*MC
4517     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4518 
4519     Synopsis:
4520     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4521 
4522     Not Collective
4523 
4524     Input Parameter:
4525 .   A - the `MATMPIAIJ` matrix
4526 
4527     Output Parameters:
4528 +   Ad - the diagonal portion of the matrix
4529 .   Ao - the off diagonal portion of the matrix
4530 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4531 -   ierr - error code
4532 
4533      Level: advanced
4534 
4535     Note:
4536     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4537 
4538 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4539 M*/
4540 
4541 /*MC
4542     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4543 
4544     Synopsis:
4545     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4546 
4547     Not Collective
4548 
4549     Input Parameters:
4550 +   A - the `MATMPIAIJ` matrix
4551 .   Ad - the diagonal portion of the matrix
4552 .   Ao - the off diagonal portion of the matrix
4553 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4554 -   ierr - error code
4555 
4556      Level: advanced
4557 
4558 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4559 M*/
4560 
4561 /*@C
4562   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4563 
4564   Not Collective
4565 
4566   Input Parameter:
4567 . A - The `MATMPIAIJ` matrix
4568 
4569   Output Parameters:
4570 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4571 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4572 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4573 
4574   Level: intermediate
4575 
4576   Note:
4577   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4578   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4579   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4580   local column numbers to global column numbers in the original matrix.
4581 
4582   Fortran Notes:
4583   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4584 
4585 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4586 @*/
4587 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4588 {
4589   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4590   PetscBool   flg;
4591 
4592   PetscFunctionBegin;
4593   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4594   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4595   if (Ad) *Ad = a->A;
4596   if (Ao) *Ao = a->B;
4597   if (colmap) *colmap = a->garray;
4598   PetscFunctionReturn(PETSC_SUCCESS);
4599 }
4600 
4601 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4602 {
4603   PetscInt     m, N, i, rstart, nnz, Ii;
4604   PetscInt    *indx;
4605   PetscScalar *values;
4606   MatType      rootType;
4607 
4608   PetscFunctionBegin;
4609   PetscCall(MatGetSize(inmat, &m, &N));
4610   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4611     PetscInt *dnz, *onz, sum, bs, cbs;
4612 
4613     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4614     /* Check sum(n) = N */
4615     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4616     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4617 
4618     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4619     rstart -= m;
4620 
4621     MatPreallocateBegin(comm, m, n, dnz, onz);
4622     for (i = 0; i < m; i++) {
4623       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4624       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4625       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4626     }
4627 
4628     PetscCall(MatCreate(comm, outmat));
4629     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4630     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4631     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4632     PetscCall(MatGetRootType_Private(inmat, &rootType));
4633     PetscCall(MatSetType(*outmat, rootType));
4634     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4635     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4636     MatPreallocateEnd(dnz, onz);
4637     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4638   }
4639 
4640   /* numeric phase */
4641   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4642   for (i = 0; i < m; i++) {
4643     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4644     Ii = i + rstart;
4645     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4646     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4647   }
4648   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4649   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4650   PetscFunctionReturn(PETSC_SUCCESS);
4651 }
4652 
4653 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4654 {
4655   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4656 
4657   PetscFunctionBegin;
4658   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4659   PetscCall(PetscFree(merge->id_r));
4660   PetscCall(PetscFree(merge->len_s));
4661   PetscCall(PetscFree(merge->len_r));
4662   PetscCall(PetscFree(merge->bi));
4663   PetscCall(PetscFree(merge->bj));
4664   PetscCall(PetscFree(merge->buf_ri[0]));
4665   PetscCall(PetscFree(merge->buf_ri));
4666   PetscCall(PetscFree(merge->buf_rj[0]));
4667   PetscCall(PetscFree(merge->buf_rj));
4668   PetscCall(PetscFree(merge->coi));
4669   PetscCall(PetscFree(merge->coj));
4670   PetscCall(PetscFree(merge->owners_co));
4671   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4672   PetscCall(PetscFree(merge));
4673   PetscFunctionReturn(PETSC_SUCCESS);
4674 }
4675 
4676 #include <../src/mat/utils/freespace.h>
4677 #include <petscbt.h>
4678 
4679 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4680 {
4681   MPI_Comm             comm;
4682   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4683   PetscMPIInt          size, rank, taga, *len_s;
4684   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4685   PetscInt             proc, m;
4686   PetscInt           **buf_ri, **buf_rj;
4687   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4688   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4689   MPI_Request         *s_waits, *r_waits;
4690   MPI_Status          *status;
4691   const MatScalar     *aa, *a_a;
4692   MatScalar          **abuf_r, *ba_i;
4693   Mat_Merge_SeqsToMPI *merge;
4694   PetscContainer       container;
4695 
4696   PetscFunctionBegin;
4697   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4698   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4699 
4700   PetscCallMPI(MPI_Comm_size(comm, &size));
4701   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4702 
4703   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4704   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4705   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4706   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4707   aa = a_a;
4708 
4709   bi     = merge->bi;
4710   bj     = merge->bj;
4711   buf_ri = merge->buf_ri;
4712   buf_rj = merge->buf_rj;
4713 
4714   PetscCall(PetscMalloc1(size, &status));
4715   owners = merge->rowmap->range;
4716   len_s  = merge->len_s;
4717 
4718   /* send and recv matrix values */
4719   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4720   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4721 
4722   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4723   for (proc = 0, k = 0; proc < size; proc++) {
4724     if (!len_s[proc]) continue;
4725     i = owners[proc];
4726     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4727     k++;
4728   }
4729 
4730   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4731   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4732   PetscCall(PetscFree(status));
4733 
4734   PetscCall(PetscFree(s_waits));
4735   PetscCall(PetscFree(r_waits));
4736 
4737   /* insert mat values of mpimat */
4738   PetscCall(PetscMalloc1(N, &ba_i));
4739   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4740 
4741   for (k = 0; k < merge->nrecv; k++) {
4742     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4743     nrows       = *(buf_ri_k[k]);
4744     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4745     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4746   }
4747 
4748   /* set values of ba */
4749   m = merge->rowmap->n;
4750   for (i = 0; i < m; i++) {
4751     arow = owners[rank] + i;
4752     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4753     bnzi = bi[i + 1] - bi[i];
4754     PetscCall(PetscArrayzero(ba_i, bnzi));
4755 
4756     /* add local non-zero vals of this proc's seqmat into ba */
4757     anzi   = ai[arow + 1] - ai[arow];
4758     aj     = a->j + ai[arow];
4759     aa     = a_a + ai[arow];
4760     nextaj = 0;
4761     for (j = 0; nextaj < anzi; j++) {
4762       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4763         ba_i[j] += aa[nextaj++];
4764       }
4765     }
4766 
4767     /* add received vals into ba */
4768     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4769       /* i-th row */
4770       if (i == *nextrow[k]) {
4771         anzi   = *(nextai[k] + 1) - *nextai[k];
4772         aj     = buf_rj[k] + *(nextai[k]);
4773         aa     = abuf_r[k] + *(nextai[k]);
4774         nextaj = 0;
4775         for (j = 0; nextaj < anzi; j++) {
4776           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4777             ba_i[j] += aa[nextaj++];
4778           }
4779         }
4780         nextrow[k]++;
4781         nextai[k]++;
4782       }
4783     }
4784     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4785   }
4786   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4787   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4788   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4789 
4790   PetscCall(PetscFree(abuf_r[0]));
4791   PetscCall(PetscFree(abuf_r));
4792   PetscCall(PetscFree(ba_i));
4793   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4794   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4795   PetscFunctionReturn(PETSC_SUCCESS);
4796 }
4797 
4798 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4799 {
4800   Mat                  B_mpi;
4801   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4802   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4803   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4804   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4805   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4806   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4807   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4808   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4809   MPI_Status          *status;
4810   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4811   PetscBT              lnkbt;
4812   Mat_Merge_SeqsToMPI *merge;
4813   PetscContainer       container;
4814 
4815   PetscFunctionBegin;
4816   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4817 
4818   /* make sure it is a PETSc comm */
4819   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4820   PetscCallMPI(MPI_Comm_size(comm, &size));
4821   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4822 
4823   PetscCall(PetscNew(&merge));
4824   PetscCall(PetscMalloc1(size, &status));
4825 
4826   /* determine row ownership */
4827   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4828   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4829   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4830   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4831   PetscCall(PetscLayoutSetUp(merge->rowmap));
4832   PetscCall(PetscMalloc1(size, &len_si));
4833   PetscCall(PetscMalloc1(size, &merge->len_s));
4834 
4835   m      = merge->rowmap->n;
4836   owners = merge->rowmap->range;
4837 
4838   /* determine the number of messages to send, their lengths */
4839   len_s = merge->len_s;
4840 
4841   len          = 0; /* length of buf_si[] */
4842   merge->nsend = 0;
4843   for (proc = 0; proc < size; proc++) {
4844     len_si[proc] = 0;
4845     if (proc == rank) {
4846       len_s[proc] = 0;
4847     } else {
4848       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4849       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4850     }
4851     if (len_s[proc]) {
4852       merge->nsend++;
4853       nrows = 0;
4854       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4855         if (ai[i + 1] > ai[i]) nrows++;
4856       }
4857       len_si[proc] = 2 * (nrows + 1);
4858       len += len_si[proc];
4859     }
4860   }
4861 
4862   /* determine the number and length of messages to receive for ij-structure */
4863   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4864   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4865 
4866   /* post the Irecv of j-structure */
4867   PetscCall(PetscCommGetNewTag(comm, &tagj));
4868   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4869 
4870   /* post the Isend of j-structure */
4871   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4872 
4873   for (proc = 0, k = 0; proc < size; proc++) {
4874     if (!len_s[proc]) continue;
4875     i = owners[proc];
4876     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4877     k++;
4878   }
4879 
4880   /* receives and sends of j-structure are complete */
4881   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4882   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4883 
4884   /* send and recv i-structure */
4885   PetscCall(PetscCommGetNewTag(comm, &tagi));
4886   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4887 
4888   PetscCall(PetscMalloc1(len + 1, &buf_s));
4889   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4890   for (proc = 0, k = 0; proc < size; proc++) {
4891     if (!len_s[proc]) continue;
4892     /* form outgoing message for i-structure:
4893          buf_si[0]:                 nrows to be sent
4894                [1:nrows]:           row index (global)
4895                [nrows+1:2*nrows+1]: i-structure index
4896     */
4897     nrows       = len_si[proc] / 2 - 1;
4898     buf_si_i    = buf_si + nrows + 1;
4899     buf_si[0]   = nrows;
4900     buf_si_i[0] = 0;
4901     nrows       = 0;
4902     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4903       anzi = ai[i + 1] - ai[i];
4904       if (anzi) {
4905         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4906         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4907         nrows++;
4908       }
4909     }
4910     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4911     k++;
4912     buf_si += len_si[proc];
4913   }
4914 
4915   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4916   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4917 
4918   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4919   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4920 
4921   PetscCall(PetscFree(len_si));
4922   PetscCall(PetscFree(len_ri));
4923   PetscCall(PetscFree(rj_waits));
4924   PetscCall(PetscFree2(si_waits, sj_waits));
4925   PetscCall(PetscFree(ri_waits));
4926   PetscCall(PetscFree(buf_s));
4927   PetscCall(PetscFree(status));
4928 
4929   /* compute a local seq matrix in each processor */
4930   /* allocate bi array and free space for accumulating nonzero column info */
4931   PetscCall(PetscMalloc1(m + 1, &bi));
4932   bi[0] = 0;
4933 
4934   /* create and initialize a linked list */
4935   nlnk = N + 1;
4936   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4937 
4938   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4939   len = ai[owners[rank + 1]] - ai[owners[rank]];
4940   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4941 
4942   current_space = free_space;
4943 
4944   /* determine symbolic info for each local row */
4945   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4946 
4947   for (k = 0; k < merge->nrecv; k++) {
4948     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4949     nrows       = *buf_ri_k[k];
4950     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4951     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4952   }
4953 
4954   MatPreallocateBegin(comm, m, n, dnz, onz);
4955   len = 0;
4956   for (i = 0; i < m; i++) {
4957     bnzi = 0;
4958     /* add local non-zero cols of this proc's seqmat into lnk */
4959     arow = owners[rank] + i;
4960     anzi = ai[arow + 1] - ai[arow];
4961     aj   = a->j + ai[arow];
4962     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4963     bnzi += nlnk;
4964     /* add received col data into lnk */
4965     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4966       if (i == *nextrow[k]) {            /* i-th row */
4967         anzi = *(nextai[k] + 1) - *nextai[k];
4968         aj   = buf_rj[k] + *nextai[k];
4969         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4970         bnzi += nlnk;
4971         nextrow[k]++;
4972         nextai[k]++;
4973       }
4974     }
4975     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4976 
4977     /* if free space is not available, make more free space */
4978     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
4979     /* copy data into free space, then initialize lnk */
4980     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
4981     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4982 
4983     current_space->array += bnzi;
4984     current_space->local_used += bnzi;
4985     current_space->local_remaining -= bnzi;
4986 
4987     bi[i + 1] = bi[i] + bnzi;
4988   }
4989 
4990   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4991 
4992   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
4993   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
4994   PetscCall(PetscLLDestroy(lnk, lnkbt));
4995 
4996   /* create symbolic parallel matrix B_mpi */
4997   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
4998   PetscCall(MatCreate(comm, &B_mpi));
4999   if (n == PETSC_DECIDE) {
5000     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5001   } else {
5002     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5003   }
5004   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5005   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5006   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5007   MatPreallocateEnd(dnz, onz);
5008   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5009 
5010   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5011   B_mpi->assembled = PETSC_FALSE;
5012   merge->bi        = bi;
5013   merge->bj        = bj;
5014   merge->buf_ri    = buf_ri;
5015   merge->buf_rj    = buf_rj;
5016   merge->coi       = NULL;
5017   merge->coj       = NULL;
5018   merge->owners_co = NULL;
5019 
5020   PetscCall(PetscCommDestroy(&comm));
5021 
5022   /* attach the supporting struct to B_mpi for reuse */
5023   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5024   PetscCall(PetscContainerSetPointer(container, merge));
5025   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5026   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5027   PetscCall(PetscContainerDestroy(&container));
5028   *mpimat = B_mpi;
5029 
5030   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5031   PetscFunctionReturn(PETSC_SUCCESS);
5032 }
5033 
5034 /*@C
5035   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5036   matrices from each processor
5037 
5038   Collective
5039 
5040   Input Parameters:
5041 + comm   - the communicators the parallel matrix will live on
5042 . seqmat - the input sequential matrices
5043 . m      - number of local rows (or `PETSC_DECIDE`)
5044 . n      - number of local columns (or `PETSC_DECIDE`)
5045 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5046 
5047   Output Parameter:
5048 . mpimat - the parallel matrix generated
5049 
5050   Level: advanced
5051 
5052   Note:
5053   The dimensions of the sequential matrix in each processor MUST be the same.
5054   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5055   destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5056 
5057 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5058 @*/
5059 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5060 {
5061   PetscMPIInt size;
5062 
5063   PetscFunctionBegin;
5064   PetscCallMPI(MPI_Comm_size(comm, &size));
5065   if (size == 1) {
5066     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5067     if (scall == MAT_INITIAL_MATRIX) {
5068       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5069     } else {
5070       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5071     }
5072     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5073     PetscFunctionReturn(PETSC_SUCCESS);
5074   }
5075   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5076   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5077   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5078   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5079   PetscFunctionReturn(PETSC_SUCCESS);
5080 }
5081 
5082 /*@
5083   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5084 
5085   Not Collective
5086 
5087   Input Parameter:
5088 . A - the matrix
5089 
5090   Output Parameter:
5091 . A_loc - the local sequential matrix generated
5092 
5093   Level: developer
5094 
5095   Notes:
5096   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5097   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5098   `n` is the global column count obtained with `MatGetSize()`
5099 
5100   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5101 
5102   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5103 
5104   Destroy the matrix with `MatDestroy()`
5105 
5106 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5107 @*/
5108 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5109 {
5110   PetscBool mpi;
5111 
5112   PetscFunctionBegin;
5113   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5114   if (mpi) {
5115     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5116   } else {
5117     *A_loc = A;
5118     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5119   }
5120   PetscFunctionReturn(PETSC_SUCCESS);
5121 }
5122 
5123 /*@
5124   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5125 
5126   Not Collective
5127 
5128   Input Parameters:
5129 + A     - the matrix
5130 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5131 
5132   Output Parameter:
5133 . A_loc - the local sequential matrix generated
5134 
5135   Level: developer
5136 
5137   Notes:
5138   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5139   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5140   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5141 
5142   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5143 
5144   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5145   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5146   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5147   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5148 
5149 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5150 @*/
5151 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5152 {
5153   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5154   Mat_SeqAIJ        *mat, *a, *b;
5155   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5156   const PetscScalar *aa, *ba, *aav, *bav;
5157   PetscScalar       *ca, *cam;
5158   PetscMPIInt        size;
5159   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5160   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5161   PetscBool          match;
5162 
5163   PetscFunctionBegin;
5164   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5165   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5166   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5167   if (size == 1) {
5168     if (scall == MAT_INITIAL_MATRIX) {
5169       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5170       *A_loc = mpimat->A;
5171     } else if (scall == MAT_REUSE_MATRIX) {
5172       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5173     }
5174     PetscFunctionReturn(PETSC_SUCCESS);
5175   }
5176 
5177   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5178   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5179   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5180   ai = a->i;
5181   aj = a->j;
5182   bi = b->i;
5183   bj = b->j;
5184   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5185   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5186   aa = aav;
5187   ba = bav;
5188   if (scall == MAT_INITIAL_MATRIX) {
5189     PetscCall(PetscMalloc1(1 + am, &ci));
5190     ci[0] = 0;
5191     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5192     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5193     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5194     k = 0;
5195     for (i = 0; i < am; i++) {
5196       ncols_o = bi[i + 1] - bi[i];
5197       ncols_d = ai[i + 1] - ai[i];
5198       /* off-diagonal portion of A */
5199       for (jo = 0; jo < ncols_o; jo++) {
5200         col = cmap[*bj];
5201         if (col >= cstart) break;
5202         cj[k] = col;
5203         bj++;
5204         ca[k++] = *ba++;
5205       }
5206       /* diagonal portion of A */
5207       for (j = 0; j < ncols_d; j++) {
5208         cj[k]   = cstart + *aj++;
5209         ca[k++] = *aa++;
5210       }
5211       /* off-diagonal portion of A */
5212       for (j = jo; j < ncols_o; j++) {
5213         cj[k]   = cmap[*bj++];
5214         ca[k++] = *ba++;
5215       }
5216     }
5217     /* put together the new matrix */
5218     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5219     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5220     /* Since these are PETSc arrays, change flags to free them as necessary. */
5221     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5222     mat->free_a  = PETSC_TRUE;
5223     mat->free_ij = PETSC_TRUE;
5224     mat->nonew   = 0;
5225   } else if (scall == MAT_REUSE_MATRIX) {
5226     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5227     ci  = mat->i;
5228     cj  = mat->j;
5229     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5230     for (i = 0; i < am; i++) {
5231       /* off-diagonal portion of A */
5232       ncols_o = bi[i + 1] - bi[i];
5233       for (jo = 0; jo < ncols_o; jo++) {
5234         col = cmap[*bj];
5235         if (col >= cstart) break;
5236         *cam++ = *ba++;
5237         bj++;
5238       }
5239       /* diagonal portion of A */
5240       ncols_d = ai[i + 1] - ai[i];
5241       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5242       /* off-diagonal portion of A */
5243       for (j = jo; j < ncols_o; j++) {
5244         *cam++ = *ba++;
5245         bj++;
5246       }
5247     }
5248     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5249   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5250   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5251   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5252   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5253   PetscFunctionReturn(PETSC_SUCCESS);
5254 }
5255 
5256 /*@
5257   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5258   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5259 
5260   Not Collective
5261 
5262   Input Parameters:
5263 + A     - the matrix
5264 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5265 
5266   Output Parameters:
5267 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5268 - A_loc - the local sequential matrix generated
5269 
5270   Level: developer
5271 
5272   Note:
5273   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5274   part, then those associated with the off diagonal part (in its local ordering)
5275 
5276 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5277 @*/
5278 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5279 {
5280   Mat             Ao, Ad;
5281   const PetscInt *cmap;
5282   PetscMPIInt     size;
5283   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5284 
5285   PetscFunctionBegin;
5286   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5287   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5288   if (size == 1) {
5289     if (scall == MAT_INITIAL_MATRIX) {
5290       PetscCall(PetscObjectReference((PetscObject)Ad));
5291       *A_loc = Ad;
5292     } else if (scall == MAT_REUSE_MATRIX) {
5293       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5294     }
5295     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5296     PetscFunctionReturn(PETSC_SUCCESS);
5297   }
5298   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5299   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5300   if (f) {
5301     PetscCall((*f)(A, scall, glob, A_loc));
5302   } else {
5303     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5304     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5305     Mat_SeqAIJ        *c;
5306     PetscInt          *ai = a->i, *aj = a->j;
5307     PetscInt          *bi = b->i, *bj = b->j;
5308     PetscInt          *ci, *cj;
5309     const PetscScalar *aa, *ba;
5310     PetscScalar       *ca;
5311     PetscInt           i, j, am, dn, on;
5312 
5313     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5314     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5315     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5316     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5317     if (scall == MAT_INITIAL_MATRIX) {
5318       PetscInt k;
5319       PetscCall(PetscMalloc1(1 + am, &ci));
5320       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5321       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5322       ci[0] = 0;
5323       for (i = 0, k = 0; i < am; i++) {
5324         const PetscInt ncols_o = bi[i + 1] - bi[i];
5325         const PetscInt ncols_d = ai[i + 1] - ai[i];
5326         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5327         /* diagonal portion of A */
5328         for (j = 0; j < ncols_d; j++, k++) {
5329           cj[k] = *aj++;
5330           ca[k] = *aa++;
5331         }
5332         /* off-diagonal portion of A */
5333         for (j = 0; j < ncols_o; j++, k++) {
5334           cj[k] = dn + *bj++;
5335           ca[k] = *ba++;
5336         }
5337       }
5338       /* put together the new matrix */
5339       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5340       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5341       /* Since these are PETSc arrays, change flags to free them as necessary. */
5342       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5343       c->free_a  = PETSC_TRUE;
5344       c->free_ij = PETSC_TRUE;
5345       c->nonew   = 0;
5346       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5347     } else if (scall == MAT_REUSE_MATRIX) {
5348       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5349       for (i = 0; i < am; i++) {
5350         const PetscInt ncols_d = ai[i + 1] - ai[i];
5351         const PetscInt ncols_o = bi[i + 1] - bi[i];
5352         /* diagonal portion of A */
5353         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5354         /* off-diagonal portion of A */
5355         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5356       }
5357       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5358     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5359     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5360     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5361     if (glob) {
5362       PetscInt cst, *gidx;
5363 
5364       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5365       PetscCall(PetscMalloc1(dn + on, &gidx));
5366       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5367       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5368       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5369     }
5370   }
5371   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5372   PetscFunctionReturn(PETSC_SUCCESS);
5373 }
5374 
5375 /*@C
5376   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5377 
5378   Not Collective
5379 
5380   Input Parameters:
5381 + A     - the matrix
5382 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5383 . row   - index set of rows to extract (or `NULL`)
5384 - col   - index set of columns to extract (or `NULL`)
5385 
5386   Output Parameter:
5387 . A_loc - the local sequential matrix generated
5388 
5389   Level: developer
5390 
5391 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5392 @*/
5393 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5394 {
5395   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5396   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5397   IS          isrowa, iscola;
5398   Mat        *aloc;
5399   PetscBool   match;
5400 
5401   PetscFunctionBegin;
5402   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5403   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5404   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5405   if (!row) {
5406     start = A->rmap->rstart;
5407     end   = A->rmap->rend;
5408     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5409   } else {
5410     isrowa = *row;
5411   }
5412   if (!col) {
5413     start = A->cmap->rstart;
5414     cmap  = a->garray;
5415     nzA   = a->A->cmap->n;
5416     nzB   = a->B->cmap->n;
5417     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5418     ncols = 0;
5419     for (i = 0; i < nzB; i++) {
5420       if (cmap[i] < start) idx[ncols++] = cmap[i];
5421       else break;
5422     }
5423     imark = i;
5424     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5425     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5426     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5427   } else {
5428     iscola = *col;
5429   }
5430   if (scall != MAT_INITIAL_MATRIX) {
5431     PetscCall(PetscMalloc1(1, &aloc));
5432     aloc[0] = *A_loc;
5433   }
5434   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5435   if (!col) { /* attach global id of condensed columns */
5436     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5437   }
5438   *A_loc = aloc[0];
5439   PetscCall(PetscFree(aloc));
5440   if (!row) PetscCall(ISDestroy(&isrowa));
5441   if (!col) PetscCall(ISDestroy(&iscola));
5442   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5443   PetscFunctionReturn(PETSC_SUCCESS);
5444 }
5445 
5446 /*
5447  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5448  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5449  * on a global size.
5450  * */
5451 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5452 {
5453   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5454   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5455   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5456   PetscMPIInt            owner;
5457   PetscSFNode           *iremote, *oiremote;
5458   const PetscInt        *lrowindices;
5459   PetscSF                sf, osf;
5460   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5461   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5462   MPI_Comm               comm;
5463   ISLocalToGlobalMapping mapping;
5464   const PetscScalar     *pd_a, *po_a;
5465 
5466   PetscFunctionBegin;
5467   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5468   /* plocalsize is the number of roots
5469    * nrows is the number of leaves
5470    * */
5471   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5472   PetscCall(ISGetLocalSize(rows, &nrows));
5473   PetscCall(PetscCalloc1(nrows, &iremote));
5474   PetscCall(ISGetIndices(rows, &lrowindices));
5475   for (i = 0; i < nrows; i++) {
5476     /* Find a remote index and an owner for a row
5477      * The row could be local or remote
5478      * */
5479     owner = 0;
5480     lidx  = 0;
5481     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5482     iremote[i].index = lidx;
5483     iremote[i].rank  = owner;
5484   }
5485   /* Create SF to communicate how many nonzero columns for each row */
5486   PetscCall(PetscSFCreate(comm, &sf));
5487   /* SF will figure out the number of nonzero colunms for each row, and their
5488    * offsets
5489    * */
5490   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5491   PetscCall(PetscSFSetFromOptions(sf));
5492   PetscCall(PetscSFSetUp(sf));
5493 
5494   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5495   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5496   PetscCall(PetscCalloc1(nrows, &pnnz));
5497   roffsets[0] = 0;
5498   roffsets[1] = 0;
5499   for (i = 0; i < plocalsize; i++) {
5500     /* diag */
5501     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5502     /* off diag */
5503     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5504     /* compute offsets so that we relative location for each row */
5505     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5506     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5507   }
5508   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5509   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5510   /* 'r' means root, and 'l' means leaf */
5511   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5512   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5513   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5514   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5515   PetscCall(PetscSFDestroy(&sf));
5516   PetscCall(PetscFree(roffsets));
5517   PetscCall(PetscFree(nrcols));
5518   dntotalcols = 0;
5519   ontotalcols = 0;
5520   ncol        = 0;
5521   for (i = 0; i < nrows; i++) {
5522     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5523     ncol    = PetscMax(pnnz[i], ncol);
5524     /* diag */
5525     dntotalcols += nlcols[i * 2 + 0];
5526     /* off diag */
5527     ontotalcols += nlcols[i * 2 + 1];
5528   }
5529   /* We do not need to figure the right number of columns
5530    * since all the calculations will be done by going through the raw data
5531    * */
5532   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5533   PetscCall(MatSetUp(*P_oth));
5534   PetscCall(PetscFree(pnnz));
5535   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5536   /* diag */
5537   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5538   /* off diag */
5539   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5540   /* diag */
5541   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5542   /* off diag */
5543   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5544   dntotalcols = 0;
5545   ontotalcols = 0;
5546   ntotalcols  = 0;
5547   for (i = 0; i < nrows; i++) {
5548     owner = 0;
5549     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5550     /* Set iremote for diag matrix */
5551     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5552       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5553       iremote[dntotalcols].rank  = owner;
5554       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5555       ilocal[dntotalcols++] = ntotalcols++;
5556     }
5557     /* off diag */
5558     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5559       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5560       oiremote[ontotalcols].rank  = owner;
5561       oilocal[ontotalcols++]      = ntotalcols++;
5562     }
5563   }
5564   PetscCall(ISRestoreIndices(rows, &lrowindices));
5565   PetscCall(PetscFree(loffsets));
5566   PetscCall(PetscFree(nlcols));
5567   PetscCall(PetscSFCreate(comm, &sf));
5568   /* P serves as roots and P_oth is leaves
5569    * Diag matrix
5570    * */
5571   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5572   PetscCall(PetscSFSetFromOptions(sf));
5573   PetscCall(PetscSFSetUp(sf));
5574 
5575   PetscCall(PetscSFCreate(comm, &osf));
5576   /* Off diag */
5577   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5578   PetscCall(PetscSFSetFromOptions(osf));
5579   PetscCall(PetscSFSetUp(osf));
5580   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5581   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5582   /* We operate on the matrix internal data for saving memory */
5583   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5584   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5585   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5586   /* Convert to global indices for diag matrix */
5587   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5588   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5589   /* We want P_oth store global indices */
5590   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5591   /* Use memory scalable approach */
5592   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5593   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5594   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5595   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5596   /* Convert back to local indices */
5597   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5598   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5599   nout = 0;
5600   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5601   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5602   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5603   /* Exchange values */
5604   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5605   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5606   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5607   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5608   /* Stop PETSc from shrinking memory */
5609   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5610   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5611   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5612   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5613   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5614   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5615   PetscCall(PetscSFDestroy(&sf));
5616   PetscCall(PetscSFDestroy(&osf));
5617   PetscFunctionReturn(PETSC_SUCCESS);
5618 }
5619 
5620 /*
5621  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5622  * This supports MPIAIJ and MAIJ
5623  * */
5624 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5625 {
5626   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5627   Mat_SeqAIJ *p_oth;
5628   IS          rows, map;
5629   PetscHMapI  hamp;
5630   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5631   MPI_Comm    comm;
5632   PetscSF     sf, osf;
5633   PetscBool   has;
5634 
5635   PetscFunctionBegin;
5636   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5637   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5638   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5639    *  and then create a submatrix (that often is an overlapping matrix)
5640    * */
5641   if (reuse == MAT_INITIAL_MATRIX) {
5642     /* Use a hash table to figure out unique keys */
5643     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5644     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5645     count = 0;
5646     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5647     for (i = 0; i < a->B->cmap->n; i++) {
5648       key = a->garray[i] / dof;
5649       PetscCall(PetscHMapIHas(hamp, key, &has));
5650       if (!has) {
5651         mapping[i] = count;
5652         PetscCall(PetscHMapISet(hamp, key, count++));
5653       } else {
5654         /* Current 'i' has the same value the previous step */
5655         mapping[i] = count - 1;
5656       }
5657     }
5658     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5659     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5660     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5661     PetscCall(PetscCalloc1(htsize, &rowindices));
5662     off = 0;
5663     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5664     PetscCall(PetscHMapIDestroy(&hamp));
5665     PetscCall(PetscSortInt(htsize, rowindices));
5666     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5667     /* In case, the matrix was already created but users want to recreate the matrix */
5668     PetscCall(MatDestroy(P_oth));
5669     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5670     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5671     PetscCall(ISDestroy(&map));
5672     PetscCall(ISDestroy(&rows));
5673   } else if (reuse == MAT_REUSE_MATRIX) {
5674     /* If matrix was already created, we simply update values using SF objects
5675      * that as attached to the matrix earlier.
5676      */
5677     const PetscScalar *pd_a, *po_a;
5678 
5679     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5680     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5681     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5682     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5683     /* Update values in place */
5684     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5685     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5686     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5687     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5688     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5689     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5690     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5691     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5692   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5693   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5694   PetscFunctionReturn(PETSC_SUCCESS);
5695 }
5696 
5697 /*@C
5698   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5699 
5700   Collective
5701 
5702   Input Parameters:
5703 + A     - the first matrix in `MATMPIAIJ` format
5704 . B     - the second matrix in `MATMPIAIJ` format
5705 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5706 
5707   Output Parameters:
5708 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5709 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5710 - B_seq - the sequential matrix generated
5711 
5712   Level: developer
5713 
5714 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5715 @*/
5716 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5717 {
5718   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5719   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5720   IS          isrowb, iscolb;
5721   Mat        *bseq = NULL;
5722 
5723   PetscFunctionBegin;
5724   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5725              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5726   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5727 
5728   if (scall == MAT_INITIAL_MATRIX) {
5729     start = A->cmap->rstart;
5730     cmap  = a->garray;
5731     nzA   = a->A->cmap->n;
5732     nzB   = a->B->cmap->n;
5733     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5734     ncols = 0;
5735     for (i = 0; i < nzB; i++) { /* row < local row index */
5736       if (cmap[i] < start) idx[ncols++] = cmap[i];
5737       else break;
5738     }
5739     imark = i;
5740     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5741     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5742     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5743     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5744   } else {
5745     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5746     isrowb = *rowb;
5747     iscolb = *colb;
5748     PetscCall(PetscMalloc1(1, &bseq));
5749     bseq[0] = *B_seq;
5750   }
5751   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5752   *B_seq = bseq[0];
5753   PetscCall(PetscFree(bseq));
5754   if (!rowb) {
5755     PetscCall(ISDestroy(&isrowb));
5756   } else {
5757     *rowb = isrowb;
5758   }
5759   if (!colb) {
5760     PetscCall(ISDestroy(&iscolb));
5761   } else {
5762     *colb = iscolb;
5763   }
5764   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5765   PetscFunctionReturn(PETSC_SUCCESS);
5766 }
5767 
5768 /*
5769     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5770     of the OFF-DIAGONAL portion of local A
5771 
5772     Collective
5773 
5774    Input Parameters:
5775 +    A,B - the matrices in `MATMPIAIJ` format
5776 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5777 
5778    Output Parameter:
5779 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5780 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5781 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5782 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5783 
5784     Developer Note:
5785     This directly accesses information inside the VecScatter associated with the matrix-vector product
5786      for this matrix. This is not desirable..
5787 
5788     Level: developer
5789 
5790 */
5791 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5792 {
5793   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5794   Mat_SeqAIJ        *b_oth;
5795   VecScatter         ctx;
5796   MPI_Comm           comm;
5797   const PetscMPIInt *rprocs, *sprocs;
5798   const PetscInt    *srow, *rstarts, *sstarts;
5799   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5800   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5801   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5802   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5803   PetscMPIInt        size, tag, rank, nreqs;
5804 
5805   PetscFunctionBegin;
5806   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5807   PetscCallMPI(MPI_Comm_size(comm, &size));
5808 
5809   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5810              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5811   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5812   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5813 
5814   if (size == 1) {
5815     startsj_s = NULL;
5816     bufa_ptr  = NULL;
5817     *B_oth    = NULL;
5818     PetscFunctionReturn(PETSC_SUCCESS);
5819   }
5820 
5821   ctx = a->Mvctx;
5822   tag = ((PetscObject)ctx)->tag;
5823 
5824   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5825   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5826   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5827   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5828   PetscCall(PetscMalloc1(nreqs, &reqs));
5829   rwaits = reqs;
5830   swaits = reqs + nrecvs;
5831 
5832   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5833   if (scall == MAT_INITIAL_MATRIX) {
5834     /* i-array */
5835     /*  post receives */
5836     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5837     for (i = 0; i < nrecvs; i++) {
5838       rowlen = rvalues + rstarts[i] * rbs;
5839       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5840       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5841     }
5842 
5843     /* pack the outgoing message */
5844     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5845 
5846     sstartsj[0] = 0;
5847     rstartsj[0] = 0;
5848     len         = 0; /* total length of j or a array to be sent */
5849     if (nsends) {
5850       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5851       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5852     }
5853     for (i = 0; i < nsends; i++) {
5854       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5855       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5856       for (j = 0; j < nrows; j++) {
5857         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5858         for (l = 0; l < sbs; l++) {
5859           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5860 
5861           rowlen[j * sbs + l] = ncols;
5862 
5863           len += ncols;
5864           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5865         }
5866         k++;
5867       }
5868       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5869 
5870       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5871     }
5872     /* recvs and sends of i-array are completed */
5873     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5874     PetscCall(PetscFree(svalues));
5875 
5876     /* allocate buffers for sending j and a arrays */
5877     PetscCall(PetscMalloc1(len + 1, &bufj));
5878     PetscCall(PetscMalloc1(len + 1, &bufa));
5879 
5880     /* create i-array of B_oth */
5881     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5882 
5883     b_othi[0] = 0;
5884     len       = 0; /* total length of j or a array to be received */
5885     k         = 0;
5886     for (i = 0; i < nrecvs; i++) {
5887       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5888       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5889       for (j = 0; j < nrows; j++) {
5890         b_othi[k + 1] = b_othi[k] + rowlen[j];
5891         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5892         k++;
5893       }
5894       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5895     }
5896     PetscCall(PetscFree(rvalues));
5897 
5898     /* allocate space for j and a arrays of B_oth */
5899     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5900     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5901 
5902     /* j-array */
5903     /*  post receives of j-array */
5904     for (i = 0; i < nrecvs; i++) {
5905       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5906       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5907     }
5908 
5909     /* pack the outgoing message j-array */
5910     if (nsends) k = sstarts[0];
5911     for (i = 0; i < nsends; i++) {
5912       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5913       bufJ  = bufj + sstartsj[i];
5914       for (j = 0; j < nrows; j++) {
5915         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5916         for (ll = 0; ll < sbs; ll++) {
5917           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5918           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5919           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5920         }
5921       }
5922       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5923     }
5924 
5925     /* recvs and sends of j-array are completed */
5926     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5927   } else if (scall == MAT_REUSE_MATRIX) {
5928     sstartsj = *startsj_s;
5929     rstartsj = *startsj_r;
5930     bufa     = *bufa_ptr;
5931     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5932     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5933   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5934 
5935   /* a-array */
5936   /*  post receives of a-array */
5937   for (i = 0; i < nrecvs; i++) {
5938     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5939     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5940   }
5941 
5942   /* pack the outgoing message a-array */
5943   if (nsends) k = sstarts[0];
5944   for (i = 0; i < nsends; i++) {
5945     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5946     bufA  = bufa + sstartsj[i];
5947     for (j = 0; j < nrows; j++) {
5948       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5949       for (ll = 0; ll < sbs; ll++) {
5950         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5951         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5952         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5953       }
5954     }
5955     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5956   }
5957   /* recvs and sends of a-array are completed */
5958   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5959   PetscCall(PetscFree(reqs));
5960 
5961   if (scall == MAT_INITIAL_MATRIX) {
5962     /* put together the new matrix */
5963     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5964 
5965     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5966     /* Since these are PETSc arrays, change flags to free them as necessary. */
5967     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5968     b_oth->free_a  = PETSC_TRUE;
5969     b_oth->free_ij = PETSC_TRUE;
5970     b_oth->nonew   = 0;
5971 
5972     PetscCall(PetscFree(bufj));
5973     if (!startsj_s || !bufa_ptr) {
5974       PetscCall(PetscFree2(sstartsj, rstartsj));
5975       PetscCall(PetscFree(bufa_ptr));
5976     } else {
5977       *startsj_s = sstartsj;
5978       *startsj_r = rstartsj;
5979       *bufa_ptr  = bufa;
5980     }
5981   } else if (scall == MAT_REUSE_MATRIX) {
5982     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5983   }
5984 
5985   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
5986   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
5987   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5988   PetscFunctionReturn(PETSC_SUCCESS);
5989 }
5990 
5991 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5993 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
5994 #if defined(PETSC_HAVE_MKL_SPARSE)
5995 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5996 #endif
5997 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5998 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
5999 #if defined(PETSC_HAVE_ELEMENTAL)
6000 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6001 #endif
6002 #if defined(PETSC_HAVE_SCALAPACK)
6003 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6004 #endif
6005 #if defined(PETSC_HAVE_HYPRE)
6006 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6007 #endif
6008 #if defined(PETSC_HAVE_CUDA)
6009 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6010 #endif
6011 #if defined(PETSC_HAVE_HIP)
6012 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6013 #endif
6014 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6015 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6016 #endif
6017 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6018 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6019 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6020 
6021 /*
6022     Computes (B'*A')' since computing B*A directly is untenable
6023 
6024                n                       p                          p
6025         [             ]       [             ]         [                 ]
6026       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6027         [             ]       [             ]         [                 ]
6028 
6029 */
6030 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6031 {
6032   Mat At, Bt, Ct;
6033 
6034   PetscFunctionBegin;
6035   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6036   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6037   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6038   PetscCall(MatDestroy(&At));
6039   PetscCall(MatDestroy(&Bt));
6040   PetscCall(MatTransposeSetPrecursor(Ct, C));
6041   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6042   PetscCall(MatDestroy(&Ct));
6043   PetscFunctionReturn(PETSC_SUCCESS);
6044 }
6045 
6046 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6047 {
6048   PetscBool cisdense;
6049 
6050   PetscFunctionBegin;
6051   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6052   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6053   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6054   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6055   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6056   PetscCall(MatSetUp(C));
6057 
6058   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6059   PetscFunctionReturn(PETSC_SUCCESS);
6060 }
6061 
6062 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6063 {
6064   Mat_Product *product = C->product;
6065   Mat          A = product->A, B = product->B;
6066 
6067   PetscFunctionBegin;
6068   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6069              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6070   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6071   C->ops->productsymbolic = MatProductSymbolic_AB;
6072   PetscFunctionReturn(PETSC_SUCCESS);
6073 }
6074 
6075 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6076 {
6077   Mat_Product *product = C->product;
6078 
6079   PetscFunctionBegin;
6080   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6081   PetscFunctionReturn(PETSC_SUCCESS);
6082 }
6083 
6084 /*
6085    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6086 
6087   Input Parameters:
6088 
6089     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6090     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6091 
6092     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6093 
6094     For Set1, j1[] contains column indices of the nonzeros.
6095     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6096     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6097     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6098 
6099     Similar for Set2.
6100 
6101     This routine merges the two sets of nonzeros row by row and removes repeats.
6102 
6103   Output Parameters: (memory is allocated by the caller)
6104 
6105     i[],j[]: the CSR of the merged matrix, which has m rows.
6106     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6107     imap2[]: similar to imap1[], but for Set2.
6108     Note we order nonzeros row-by-row and from left to right.
6109 */
6110 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6111 {
6112   PetscInt   r, m; /* Row index of mat */
6113   PetscCount t, t1, t2, b1, e1, b2, e2;
6114 
6115   PetscFunctionBegin;
6116   PetscCall(MatGetLocalSize(mat, &m, NULL));
6117   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6118   i[0]        = 0;
6119   for (r = 0; r < m; r++) { /* Do row by row merging */
6120     b1 = rowBegin1[r];
6121     e1 = rowEnd1[r];
6122     b2 = rowBegin2[r];
6123     e2 = rowEnd2[r];
6124     while (b1 < e1 && b2 < e2) {
6125       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6126         j[t]      = j1[b1];
6127         imap1[t1] = t;
6128         imap2[t2] = t;
6129         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6130         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6131         t1++;
6132         t2++;
6133         t++;
6134       } else if (j1[b1] < j2[b2]) {
6135         j[t]      = j1[b1];
6136         imap1[t1] = t;
6137         b1 += jmap1[t1 + 1] - jmap1[t1];
6138         t1++;
6139         t++;
6140       } else {
6141         j[t]      = j2[b2];
6142         imap2[t2] = t;
6143         b2 += jmap2[t2 + 1] - jmap2[t2];
6144         t2++;
6145         t++;
6146       }
6147     }
6148     /* Merge the remaining in either j1[] or j2[] */
6149     while (b1 < e1) {
6150       j[t]      = j1[b1];
6151       imap1[t1] = t;
6152       b1 += jmap1[t1 + 1] - jmap1[t1];
6153       t1++;
6154       t++;
6155     }
6156     while (b2 < e2) {
6157       j[t]      = j2[b2];
6158       imap2[t2] = t;
6159       b2 += jmap2[t2 + 1] - jmap2[t2];
6160       t2++;
6161       t++;
6162     }
6163     i[r + 1] = t;
6164   }
6165   PetscFunctionReturn(PETSC_SUCCESS);
6166 }
6167 
6168 /*
6169   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6170 
6171   Input Parameters:
6172     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6173     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6174       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6175 
6176       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6177       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6178 
6179   Output Parameters:
6180     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6181     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6182       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6183       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6184 
6185     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6186       Atot: number of entries belonging to the diagonal block.
6187       Annz: number of unique nonzeros belonging to the diagonal block.
6188       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6189         repeats (i.e., same 'i,j' pair).
6190       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6191         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6192 
6193       Atot: number of entries belonging to the diagonal block
6194       Annz: number of unique nonzeros belonging to the diagonal block.
6195 
6196     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6197 
6198     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6199 */
6200 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6201 {
6202   PetscInt    cstart, cend, rstart, rend, row, col;
6203   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6204   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6205   PetscCount  k, m, p, q, r, s, mid;
6206   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6207 
6208   PetscFunctionBegin;
6209   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6210   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6211   m = rend - rstart;
6212 
6213   /* Skip negative rows */
6214   for (k = 0; k < n; k++)
6215     if (i[k] >= 0) break;
6216 
6217   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6218      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6219   */
6220   while (k < n) {
6221     row = i[k];
6222     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6223     for (s = k; s < n; s++)
6224       if (i[s] != row) break;
6225 
6226     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6227     for (p = k; p < s; p++) {
6228       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6229       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6230     }
6231     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6232     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6233     rowBegin[row - rstart] = k;
6234     rowMid[row - rstart]   = mid;
6235     rowEnd[row - rstart]   = s;
6236 
6237     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6238     Atot += mid - k;
6239     Btot += s - mid;
6240 
6241     /* Count unique nonzeros of this diag row */
6242     for (p = k; p < mid;) {
6243       col = j[p];
6244       do {
6245         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6246         p++;
6247       } while (p < mid && j[p] == col);
6248       Annz++;
6249     }
6250 
6251     /* Count unique nonzeros of this offdiag row */
6252     for (p = mid; p < s;) {
6253       col = j[p];
6254       do {
6255         p++;
6256       } while (p < s && j[p] == col);
6257       Bnnz++;
6258     }
6259     k = s;
6260   }
6261 
6262   /* Allocation according to Atot, Btot, Annz, Bnnz */
6263   PetscCall(PetscMalloc1(Atot, &Aperm));
6264   PetscCall(PetscMalloc1(Btot, &Bperm));
6265   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6266   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6267 
6268   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6269   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6270   for (r = 0; r < m; r++) {
6271     k   = rowBegin[r];
6272     mid = rowMid[r];
6273     s   = rowEnd[r];
6274     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6275     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6276     Atot += mid - k;
6277     Btot += s - mid;
6278 
6279     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6280     for (p = k; p < mid;) {
6281       col = j[p];
6282       q   = p;
6283       do {
6284         p++;
6285       } while (p < mid && j[p] == col);
6286       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6287       Annz++;
6288     }
6289 
6290     for (p = mid; p < s;) {
6291       col = j[p];
6292       q   = p;
6293       do {
6294         p++;
6295       } while (p < s && j[p] == col);
6296       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6297       Bnnz++;
6298     }
6299   }
6300   /* Output */
6301   *Aperm_ = Aperm;
6302   *Annz_  = Annz;
6303   *Atot_  = Atot;
6304   *Ajmap_ = Ajmap;
6305   *Bperm_ = Bperm;
6306   *Bnnz_  = Bnnz;
6307   *Btot_  = Btot;
6308   *Bjmap_ = Bjmap;
6309   PetscFunctionReturn(PETSC_SUCCESS);
6310 }
6311 
6312 /*
6313   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6314 
6315   Input Parameters:
6316     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6317     nnz:  number of unique nonzeros in the merged matrix
6318     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6319     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6320 
6321   Output Parameter: (memory is allocated by the caller)
6322     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6323 
6324   Example:
6325     nnz1 = 4
6326     nnz  = 6
6327     imap = [1,3,4,5]
6328     jmap = [0,3,5,6,7]
6329    then,
6330     jmap_new = [0,0,3,3,5,6,7]
6331 */
6332 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6333 {
6334   PetscCount k, p;
6335 
6336   PetscFunctionBegin;
6337   jmap_new[0] = 0;
6338   p           = nnz;                /* p loops over jmap_new[] backwards */
6339   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6340     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6341   }
6342   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6343   PetscFunctionReturn(PETSC_SUCCESS);
6344 }
6345 
6346 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6347 {
6348   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6349 
6350   PetscFunctionBegin;
6351   PetscCall(PetscSFDestroy(&coo->sf));
6352   PetscCall(PetscFree(coo->Aperm1));
6353   PetscCall(PetscFree(coo->Bperm1));
6354   PetscCall(PetscFree(coo->Ajmap1));
6355   PetscCall(PetscFree(coo->Bjmap1));
6356   PetscCall(PetscFree(coo->Aimap2));
6357   PetscCall(PetscFree(coo->Bimap2));
6358   PetscCall(PetscFree(coo->Aperm2));
6359   PetscCall(PetscFree(coo->Bperm2));
6360   PetscCall(PetscFree(coo->Ajmap2));
6361   PetscCall(PetscFree(coo->Bjmap2));
6362   PetscCall(PetscFree(coo->Cperm1));
6363   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6364   PetscCall(PetscFree(coo));
6365   PetscFunctionReturn(PETSC_SUCCESS);
6366 }
6367 
6368 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6369 {
6370   MPI_Comm             comm;
6371   PetscMPIInt          rank, size;
6372   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6373   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6374   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6375   PetscContainer       container;
6376   MatCOOStruct_MPIAIJ *coo;
6377 
6378   PetscFunctionBegin;
6379   PetscCall(PetscFree(mpiaij->garray));
6380   PetscCall(VecDestroy(&mpiaij->lvec));
6381 #if defined(PETSC_USE_CTABLE)
6382   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6383 #else
6384   PetscCall(PetscFree(mpiaij->colmap));
6385 #endif
6386   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6387   mat->assembled     = PETSC_FALSE;
6388   mat->was_assembled = PETSC_FALSE;
6389 
6390   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6391   PetscCallMPI(MPI_Comm_size(comm, &size));
6392   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6393   PetscCall(PetscLayoutSetUp(mat->rmap));
6394   PetscCall(PetscLayoutSetUp(mat->cmap));
6395   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6396   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6397   PetscCall(MatGetLocalSize(mat, &m, &n));
6398   PetscCall(MatGetSize(mat, &M, &N));
6399 
6400   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6401   /* entries come first, then local rows, then remote rows.                     */
6402   PetscCount n1 = coo_n, *perm1;
6403   PetscInt  *i1 = coo_i, *j1 = coo_j;
6404 
6405   PetscCall(PetscMalloc1(n1, &perm1));
6406   for (k = 0; k < n1; k++) perm1[k] = k;
6407 
6408   /* Manipulate indices so that entries with negative row or col indices will have smallest
6409      row indices, local entries will have greater but negative row indices, and remote entries
6410      will have positive row indices.
6411   */
6412   for (k = 0; k < n1; k++) {
6413     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6414     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6415     else {
6416       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6417       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6418     }
6419   }
6420 
6421   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6422   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6423 
6424   /* Advance k to the first entry we need to take care of */
6425   for (k = 0; k < n1; k++)
6426     if (i1[k] > PETSC_MIN_INT) break;
6427   PetscInt i1start = k;
6428 
6429   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6430   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6431 
6432   /*           Send remote rows to their owner                                  */
6433   /* Find which rows should be sent to which remote ranks*/
6434   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6435   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6436   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6437   const PetscInt *ranges;
6438   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6439 
6440   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6441   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6442   for (k = rem; k < n1;) {
6443     PetscMPIInt owner;
6444     PetscInt    firstRow, lastRow;
6445 
6446     /* Locate a row range */
6447     firstRow = i1[k]; /* first row of this owner */
6448     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6449     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6450 
6451     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6452     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6453 
6454     /* All entries in [k,p) belong to this remote owner */
6455     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6456       PetscMPIInt *sendto2;
6457       PetscInt    *nentries2;
6458       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6459 
6460       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6461       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6462       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6463       PetscCall(PetscFree2(sendto, nentries2));
6464       sendto   = sendto2;
6465       nentries = nentries2;
6466       maxNsend = maxNsend2;
6467     }
6468     sendto[nsend]   = owner;
6469     nentries[nsend] = p - k;
6470     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6471     nsend++;
6472     k = p;
6473   }
6474 
6475   /* Build 1st SF to know offsets on remote to send data */
6476   PetscSF      sf1;
6477   PetscInt     nroots = 1, nroots2 = 0;
6478   PetscInt     nleaves = nsend, nleaves2 = 0;
6479   PetscInt    *offsets;
6480   PetscSFNode *iremote;
6481 
6482   PetscCall(PetscSFCreate(comm, &sf1));
6483   PetscCall(PetscMalloc1(nsend, &iremote));
6484   PetscCall(PetscMalloc1(nsend, &offsets));
6485   for (k = 0; k < nsend; k++) {
6486     iremote[k].rank  = sendto[k];
6487     iremote[k].index = 0;
6488     nleaves2 += nentries[k];
6489     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6490   }
6491   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6492   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6493   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6494   PetscCall(PetscSFDestroy(&sf1));
6495   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6496 
6497   /* Build 2nd SF to send remote COOs to their owner */
6498   PetscSF sf2;
6499   nroots  = nroots2;
6500   nleaves = nleaves2;
6501   PetscCall(PetscSFCreate(comm, &sf2));
6502   PetscCall(PetscSFSetFromOptions(sf2));
6503   PetscCall(PetscMalloc1(nleaves, &iremote));
6504   p = 0;
6505   for (k = 0; k < nsend; k++) {
6506     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6507     for (q = 0; q < nentries[k]; q++, p++) {
6508       iremote[p].rank  = sendto[k];
6509       iremote[p].index = offsets[k] + q;
6510     }
6511   }
6512   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6513 
6514   /* Send the remote COOs to their owner */
6515   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6516   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6517   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6518   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6519   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6520   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6521   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6522 
6523   PetscCall(PetscFree(offsets));
6524   PetscCall(PetscFree2(sendto, nentries));
6525 
6526   /* Sort received COOs by row along with the permutation array     */
6527   for (k = 0; k < n2; k++) perm2[k] = k;
6528   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6529 
6530   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6531   PetscCount *Cperm1;
6532   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6533   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves));
6534 
6535   /* Support for HYPRE matrices, kind of a hack.
6536      Swap min column with diagonal so that diagonal values will go first */
6537   PetscBool   hypre;
6538   const char *name;
6539   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6540   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6541   if (hypre) {
6542     PetscInt *minj;
6543     PetscBT   hasdiag;
6544 
6545     PetscCall(PetscBTCreate(m, &hasdiag));
6546     PetscCall(PetscMalloc1(m, &minj));
6547     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6548     for (k = i1start; k < rem; k++) {
6549       if (j1[k] < cstart || j1[k] >= cend) continue;
6550       const PetscInt rindex = i1[k] - rstart;
6551       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6552       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6553     }
6554     for (k = 0; k < n2; k++) {
6555       if (j2[k] < cstart || j2[k] >= cend) continue;
6556       const PetscInt rindex = i2[k] - rstart;
6557       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6558       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6559     }
6560     for (k = i1start; k < rem; k++) {
6561       const PetscInt rindex = i1[k] - rstart;
6562       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6563       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6564       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6565     }
6566     for (k = 0; k < n2; k++) {
6567       const PetscInt rindex = i2[k] - rstart;
6568       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6569       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6570       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6571     }
6572     PetscCall(PetscBTDestroy(&hasdiag));
6573     PetscCall(PetscFree(minj));
6574   }
6575 
6576   /* Split local COOs and received COOs into diag/offdiag portions */
6577   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6578   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6579   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6580   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6581   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6582   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6583 
6584   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6585   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6586   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6587   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6588 
6589   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6590   PetscInt *Ai, *Bi;
6591   PetscInt *Aj, *Bj;
6592 
6593   PetscCall(PetscMalloc1(m + 1, &Ai));
6594   PetscCall(PetscMalloc1(m + 1, &Bi));
6595   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6596   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6597 
6598   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6599   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6600   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6601   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6602   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6603 
6604   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6605   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6606 
6607   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6608   /* expect nonzeros in A/B most likely have local contributing entries        */
6609   PetscInt    Annz = Ai[m];
6610   PetscInt    Bnnz = Bi[m];
6611   PetscCount *Ajmap1_new, *Bjmap1_new;
6612 
6613   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6614   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6615 
6616   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6617   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6618 
6619   PetscCall(PetscFree(Aimap1));
6620   PetscCall(PetscFree(Ajmap1));
6621   PetscCall(PetscFree(Bimap1));
6622   PetscCall(PetscFree(Bjmap1));
6623   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6624   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6625   PetscCall(PetscFree(perm1));
6626   PetscCall(PetscFree3(i2, j2, perm2));
6627 
6628   Ajmap1 = Ajmap1_new;
6629   Bjmap1 = Bjmap1_new;
6630 
6631   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6632   if (Annz < Annz1 + Annz2) {
6633     PetscInt *Aj_new;
6634     PetscCall(PetscMalloc1(Annz, &Aj_new));
6635     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6636     PetscCall(PetscFree(Aj));
6637     Aj = Aj_new;
6638   }
6639 
6640   if (Bnnz < Bnnz1 + Bnnz2) {
6641     PetscInt *Bj_new;
6642     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6643     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6644     PetscCall(PetscFree(Bj));
6645     Bj = Bj_new;
6646   }
6647 
6648   /* Create new submatrices for on-process and off-process coupling                  */
6649   PetscScalar     *Aa, *Ba;
6650   MatType          rtype;
6651   Mat_SeqAIJ      *a, *b;
6652   PetscObjectState state;
6653   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6654   PetscCall(PetscCalloc1(Bnnz, &Ba));
6655   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6656   if (cstart) {
6657     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6658   }
6659   PetscCall(MatDestroy(&mpiaij->A));
6660   PetscCall(MatDestroy(&mpiaij->B));
6661   PetscCall(MatGetRootType_Private(mat, &rtype));
6662   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6663   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6664   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6665   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6666   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6667   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6668 
6669   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6670   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6671   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6672   a->free_a = b->free_a = PETSC_TRUE;
6673   a->free_ij = b->free_ij = PETSC_TRUE;
6674 
6675   /* conversion must happen AFTER multiply setup */
6676   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6677   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6678   PetscCall(VecDestroy(&mpiaij->lvec));
6679   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6680 
6681   // Put the COO struct in a container and then attach that to the matrix
6682   PetscCall(PetscMalloc1(1, &coo));
6683   coo->n       = coo_n;
6684   coo->sf      = sf2;
6685   coo->sendlen = nleaves;
6686   coo->recvlen = nroots;
6687   coo->Annz    = Annz;
6688   coo->Bnnz    = Bnnz;
6689   coo->Annz2   = Annz2;
6690   coo->Bnnz2   = Bnnz2;
6691   coo->Atot1   = Atot1;
6692   coo->Atot2   = Atot2;
6693   coo->Btot1   = Btot1;
6694   coo->Btot2   = Btot2;
6695   coo->Ajmap1  = Ajmap1;
6696   coo->Aperm1  = Aperm1;
6697   coo->Bjmap1  = Bjmap1;
6698   coo->Bperm1  = Bperm1;
6699   coo->Aimap2  = Aimap2;
6700   coo->Ajmap2  = Ajmap2;
6701   coo->Aperm2  = Aperm2;
6702   coo->Bimap2  = Bimap2;
6703   coo->Bjmap2  = Bjmap2;
6704   coo->Bperm2  = Bperm2;
6705   coo->Cperm1  = Cperm1;
6706   // Allocate in preallocation. If not used, it has zero cost on host
6707   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6708   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6709   PetscCall(PetscContainerSetPointer(container, coo));
6710   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6711   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6712   PetscCall(PetscContainerDestroy(&container));
6713   PetscFunctionReturn(PETSC_SUCCESS);
6714 }
6715 
6716 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6717 {
6718   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6719   Mat                  A = mpiaij->A, B = mpiaij->B;
6720   PetscScalar         *Aa, *Ba;
6721   PetscScalar         *sendbuf, *recvbuf;
6722   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6723   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6724   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6725   const PetscCount    *Cperm1;
6726   PetscContainer       container;
6727   MatCOOStruct_MPIAIJ *coo;
6728 
6729   PetscFunctionBegin;
6730   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6731   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6732   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6733   sendbuf = coo->sendbuf;
6734   recvbuf = coo->recvbuf;
6735   Ajmap1  = coo->Ajmap1;
6736   Ajmap2  = coo->Ajmap2;
6737   Aimap2  = coo->Aimap2;
6738   Bjmap1  = coo->Bjmap1;
6739   Bjmap2  = coo->Bjmap2;
6740   Bimap2  = coo->Bimap2;
6741   Aperm1  = coo->Aperm1;
6742   Aperm2  = coo->Aperm2;
6743   Bperm1  = coo->Bperm1;
6744   Bperm2  = coo->Bperm2;
6745   Cperm1  = coo->Cperm1;
6746 
6747   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6748   PetscCall(MatSeqAIJGetArray(B, &Ba));
6749 
6750   /* Pack entries to be sent to remote */
6751   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6752 
6753   /* Send remote entries to their owner and overlap the communication with local computation */
6754   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6755   /* Add local entries to A and B */
6756   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6757     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6758     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6759     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6760   }
6761   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6762     PetscScalar sum = 0.0;
6763     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6764     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6765   }
6766   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6767 
6768   /* Add received remote entries to A and B */
6769   for (PetscCount i = 0; i < coo->Annz2; i++) {
6770     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6771   }
6772   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6773     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6774   }
6775   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6776   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6777   PetscFunctionReturn(PETSC_SUCCESS);
6778 }
6779 
6780 /*MC
6781    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6782 
6783    Options Database Keys:
6784 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6785 
6786    Level: beginner
6787 
6788    Notes:
6789    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6790     in this case the values associated with the rows and columns one passes in are set to zero
6791     in the matrix
6792 
6793     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6794     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6795 
6796 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6797 M*/
6798 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6799 {
6800   Mat_MPIAIJ *b;
6801   PetscMPIInt size;
6802 
6803   PetscFunctionBegin;
6804   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6805 
6806   PetscCall(PetscNew(&b));
6807   B->data       = (void *)b;
6808   B->ops[0]     = MatOps_Values;
6809   B->assembled  = PETSC_FALSE;
6810   B->insertmode = NOT_SET_VALUES;
6811   b->size       = size;
6812 
6813   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6814 
6815   /* build cache for off array entries formed */
6816   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6817 
6818   b->donotstash  = PETSC_FALSE;
6819   b->colmap      = NULL;
6820   b->garray      = NULL;
6821   b->roworiented = PETSC_TRUE;
6822 
6823   /* stuff used for matrix vector multiply */
6824   b->lvec  = NULL;
6825   b->Mvctx = NULL;
6826 
6827   /* stuff for MatGetRow() */
6828   b->rowindices   = NULL;
6829   b->rowvalues    = NULL;
6830   b->getrowactive = PETSC_FALSE;
6831 
6832   /* flexible pointer used in CUSPARSE classes */
6833   b->spptr = NULL;
6834 
6835   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6837   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6842   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6843   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6844   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6845 #if defined(PETSC_HAVE_CUDA)
6846   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6847 #endif
6848 #if defined(PETSC_HAVE_HIP)
6849   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6850 #endif
6851 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6852   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6853 #endif
6854 #if defined(PETSC_HAVE_MKL_SPARSE)
6855   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6856 #endif
6857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6858   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6859   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6860   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6861 #if defined(PETSC_HAVE_ELEMENTAL)
6862   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6863 #endif
6864 #if defined(PETSC_HAVE_SCALAPACK)
6865   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6866 #endif
6867   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6868   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6869 #if defined(PETSC_HAVE_HYPRE)
6870   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6871   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6872 #endif
6873   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6874   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6875   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6876   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6877   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6878   PetscFunctionReturn(PETSC_SUCCESS);
6879 }
6880 
6881 /*@C
6882   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6883   and "off-diagonal" part of the matrix in CSR format.
6884 
6885   Collective
6886 
6887   Input Parameters:
6888 + comm - MPI communicator
6889 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6890 . n    - This value should be the same as the local size used in creating the
6891        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6892        calculated if `N` is given) For square matrices `n` is almost always `m`.
6893 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6894 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6895 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6896 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6897 . a    - matrix values
6898 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6899 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6900 - oa   - matrix values
6901 
6902   Output Parameter:
6903 . mat - the matrix
6904 
6905   Level: advanced
6906 
6907   Notes:
6908   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6909   must free the arrays once the matrix has been destroyed and not before.
6910 
6911   The `i` and `j` indices are 0 based
6912 
6913   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6914 
6915   This sets local rows and cannot be used to set off-processor values.
6916 
6917   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6918   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6919   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6920   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6921   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6922   communication if it is known that only local entries will be set.
6923 
6924 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6925           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6926 @*/
6927 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6928 {
6929   Mat_MPIAIJ *maij;
6930 
6931   PetscFunctionBegin;
6932   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6933   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6934   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6935   PetscCall(MatCreate(comm, mat));
6936   PetscCall(MatSetSizes(*mat, m, n, M, N));
6937   PetscCall(MatSetType(*mat, MATMPIAIJ));
6938   maij = (Mat_MPIAIJ *)(*mat)->data;
6939 
6940   (*mat)->preallocated = PETSC_TRUE;
6941 
6942   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6943   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6944 
6945   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6946   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6947 
6948   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6949   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6950   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6951   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6952   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6953   PetscFunctionReturn(PETSC_SUCCESS);
6954 }
6955 
6956 typedef struct {
6957   Mat       *mp;    /* intermediate products */
6958   PetscBool *mptmp; /* is the intermediate product temporary ? */
6959   PetscInt   cp;    /* number of intermediate products */
6960 
6961   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6962   PetscInt    *startsj_s, *startsj_r;
6963   PetscScalar *bufa;
6964   Mat          P_oth;
6965 
6966   /* may take advantage of merging product->B */
6967   Mat Bloc; /* B-local by merging diag and off-diag */
6968 
6969   /* cusparse does not have support to split between symbolic and numeric phases.
6970      When api_user is true, we don't need to update the numerical values
6971      of the temporary storage */
6972   PetscBool reusesym;
6973 
6974   /* support for COO values insertion */
6975   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6976   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6977   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6978   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6979   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6980   PetscMemType mtype;
6981 
6982   /* customization */
6983   PetscBool abmerge;
6984   PetscBool P_oth_bind;
6985 } MatMatMPIAIJBACKEND;
6986 
6987 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6988 {
6989   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6990   PetscInt             i;
6991 
6992   PetscFunctionBegin;
6993   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6994   PetscCall(PetscFree(mmdata->bufa));
6995   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6996   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6997   PetscCall(MatDestroy(&mmdata->P_oth));
6998   PetscCall(MatDestroy(&mmdata->Bloc));
6999   PetscCall(PetscSFDestroy(&mmdata->sf));
7000   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7001   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7002   PetscCall(PetscFree(mmdata->own[0]));
7003   PetscCall(PetscFree(mmdata->own));
7004   PetscCall(PetscFree(mmdata->off[0]));
7005   PetscCall(PetscFree(mmdata->off));
7006   PetscCall(PetscFree(mmdata));
7007   PetscFunctionReturn(PETSC_SUCCESS);
7008 }
7009 
7010 /* Copy selected n entries with indices in idx[] of A to v[].
7011    If idx is NULL, copy the whole data array of A to v[]
7012  */
7013 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7014 {
7015   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7016 
7017   PetscFunctionBegin;
7018   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7019   if (f) {
7020     PetscCall((*f)(A, n, idx, v));
7021   } else {
7022     const PetscScalar *vv;
7023 
7024     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7025     if (n && idx) {
7026       PetscScalar    *w  = v;
7027       const PetscInt *oi = idx;
7028       PetscInt        j;
7029 
7030       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7031     } else {
7032       PetscCall(PetscArraycpy(v, vv, n));
7033     }
7034     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7035   }
7036   PetscFunctionReturn(PETSC_SUCCESS);
7037 }
7038 
7039 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7040 {
7041   MatMatMPIAIJBACKEND *mmdata;
7042   PetscInt             i, n_d, n_o;
7043 
7044   PetscFunctionBegin;
7045   MatCheckProduct(C, 1);
7046   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7047   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7048   if (!mmdata->reusesym) { /* update temporary matrices */
7049     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7050     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7051   }
7052   mmdata->reusesym = PETSC_FALSE;
7053 
7054   for (i = 0; i < mmdata->cp; i++) {
7055     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7056     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7057   }
7058   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7059     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7060 
7061     if (mmdata->mptmp[i]) continue;
7062     if (noff) {
7063       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7064 
7065       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7066       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7067       n_o += noff;
7068       n_d += nown;
7069     } else {
7070       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7071 
7072       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7073       n_d += mm->nz;
7074     }
7075   }
7076   if (mmdata->hasoffproc) { /* offprocess insertion */
7077     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7078     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7079   }
7080   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7081   PetscFunctionReturn(PETSC_SUCCESS);
7082 }
7083 
7084 /* Support for Pt * A, A * P, or Pt * A * P */
7085 #define MAX_NUMBER_INTERMEDIATE 4
7086 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7087 {
7088   Mat_Product           *product = C->product;
7089   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7090   Mat_MPIAIJ            *a, *p;
7091   MatMatMPIAIJBACKEND   *mmdata;
7092   ISLocalToGlobalMapping P_oth_l2g = NULL;
7093   IS                     glob      = NULL;
7094   const char            *prefix;
7095   char                   pprefix[256];
7096   const PetscInt        *globidx, *P_oth_idx;
7097   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7098   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7099   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7100                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7101                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7102   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7103 
7104   MatProductType ptype;
7105   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7106   PetscMPIInt    size;
7107 
7108   PetscFunctionBegin;
7109   MatCheckProduct(C, 1);
7110   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7111   ptype = product->type;
7112   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7113     ptype                                          = MATPRODUCT_AB;
7114     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7115   }
7116   switch (ptype) {
7117   case MATPRODUCT_AB:
7118     A          = product->A;
7119     P          = product->B;
7120     m          = A->rmap->n;
7121     n          = P->cmap->n;
7122     M          = A->rmap->N;
7123     N          = P->cmap->N;
7124     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7125     break;
7126   case MATPRODUCT_AtB:
7127     P          = product->A;
7128     A          = product->B;
7129     m          = P->cmap->n;
7130     n          = A->cmap->n;
7131     M          = P->cmap->N;
7132     N          = A->cmap->N;
7133     hasoffproc = PETSC_TRUE;
7134     break;
7135   case MATPRODUCT_PtAP:
7136     A          = product->A;
7137     P          = product->B;
7138     m          = P->cmap->n;
7139     n          = P->cmap->n;
7140     M          = P->cmap->N;
7141     N          = P->cmap->N;
7142     hasoffproc = PETSC_TRUE;
7143     break;
7144   default:
7145     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7146   }
7147   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7148   if (size == 1) hasoffproc = PETSC_FALSE;
7149 
7150   /* defaults */
7151   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7152     mp[i]    = NULL;
7153     mptmp[i] = PETSC_FALSE;
7154     rmapt[i] = -1;
7155     cmapt[i] = -1;
7156     rmapa[i] = NULL;
7157     cmapa[i] = NULL;
7158   }
7159 
7160   /* customization */
7161   PetscCall(PetscNew(&mmdata));
7162   mmdata->reusesym = product->api_user;
7163   if (ptype == MATPRODUCT_AB) {
7164     if (product->api_user) {
7165       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7166       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7167       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7168       PetscOptionsEnd();
7169     } else {
7170       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7171       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7172       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7173       PetscOptionsEnd();
7174     }
7175   } else if (ptype == MATPRODUCT_PtAP) {
7176     if (product->api_user) {
7177       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7178       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7179       PetscOptionsEnd();
7180     } else {
7181       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7182       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7183       PetscOptionsEnd();
7184     }
7185   }
7186   a = (Mat_MPIAIJ *)A->data;
7187   p = (Mat_MPIAIJ *)P->data;
7188   PetscCall(MatSetSizes(C, m, n, M, N));
7189   PetscCall(PetscLayoutSetUp(C->rmap));
7190   PetscCall(PetscLayoutSetUp(C->cmap));
7191   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7192   PetscCall(MatGetOptionsPrefix(C, &prefix));
7193 
7194   cp = 0;
7195   switch (ptype) {
7196   case MATPRODUCT_AB: /* A * P */
7197     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7198 
7199     /* A_diag * P_local (merged or not) */
7200     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7201       /* P is product->B */
7202       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7203       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7204       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7205       PetscCall(MatProductSetFill(mp[cp], product->fill));
7206       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7207       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7208       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7209       mp[cp]->product->api_user = product->api_user;
7210       PetscCall(MatProductSetFromOptions(mp[cp]));
7211       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7212       PetscCall(ISGetIndices(glob, &globidx));
7213       rmapt[cp] = 1;
7214       cmapt[cp] = 2;
7215       cmapa[cp] = globidx;
7216       mptmp[cp] = PETSC_FALSE;
7217       cp++;
7218     } else { /* A_diag * P_diag and A_diag * P_off */
7219       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7220       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7221       PetscCall(MatProductSetFill(mp[cp], product->fill));
7222       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7223       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7224       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7225       mp[cp]->product->api_user = product->api_user;
7226       PetscCall(MatProductSetFromOptions(mp[cp]));
7227       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7228       rmapt[cp] = 1;
7229       cmapt[cp] = 1;
7230       mptmp[cp] = PETSC_FALSE;
7231       cp++;
7232       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7233       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7234       PetscCall(MatProductSetFill(mp[cp], product->fill));
7235       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7236       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7237       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7238       mp[cp]->product->api_user = product->api_user;
7239       PetscCall(MatProductSetFromOptions(mp[cp]));
7240       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7241       rmapt[cp] = 1;
7242       cmapt[cp] = 2;
7243       cmapa[cp] = p->garray;
7244       mptmp[cp] = PETSC_FALSE;
7245       cp++;
7246     }
7247 
7248     /* A_off * P_other */
7249     if (mmdata->P_oth) {
7250       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7251       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7252       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7253       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7254       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7255       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7256       PetscCall(MatProductSetFill(mp[cp], product->fill));
7257       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7258       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7259       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7260       mp[cp]->product->api_user = product->api_user;
7261       PetscCall(MatProductSetFromOptions(mp[cp]));
7262       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7263       rmapt[cp] = 1;
7264       cmapt[cp] = 2;
7265       cmapa[cp] = P_oth_idx;
7266       mptmp[cp] = PETSC_FALSE;
7267       cp++;
7268     }
7269     break;
7270 
7271   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7272     /* A is product->B */
7273     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7274     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7275       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7276       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7277       PetscCall(MatProductSetFill(mp[cp], product->fill));
7278       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7279       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7280       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7281       mp[cp]->product->api_user = product->api_user;
7282       PetscCall(MatProductSetFromOptions(mp[cp]));
7283       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7284       PetscCall(ISGetIndices(glob, &globidx));
7285       rmapt[cp] = 2;
7286       rmapa[cp] = globidx;
7287       cmapt[cp] = 2;
7288       cmapa[cp] = globidx;
7289       mptmp[cp] = PETSC_FALSE;
7290       cp++;
7291     } else {
7292       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7293       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7294       PetscCall(MatProductSetFill(mp[cp], product->fill));
7295       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7296       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7297       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7298       mp[cp]->product->api_user = product->api_user;
7299       PetscCall(MatProductSetFromOptions(mp[cp]));
7300       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7301       PetscCall(ISGetIndices(glob, &globidx));
7302       rmapt[cp] = 1;
7303       cmapt[cp] = 2;
7304       cmapa[cp] = globidx;
7305       mptmp[cp] = PETSC_FALSE;
7306       cp++;
7307       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7308       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7309       PetscCall(MatProductSetFill(mp[cp], product->fill));
7310       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7311       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7312       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7313       mp[cp]->product->api_user = product->api_user;
7314       PetscCall(MatProductSetFromOptions(mp[cp]));
7315       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7316       rmapt[cp] = 2;
7317       rmapa[cp] = p->garray;
7318       cmapt[cp] = 2;
7319       cmapa[cp] = globidx;
7320       mptmp[cp] = PETSC_FALSE;
7321       cp++;
7322     }
7323     break;
7324   case MATPRODUCT_PtAP:
7325     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7326     /* P is product->B */
7327     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7328     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7329     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7330     PetscCall(MatProductSetFill(mp[cp], product->fill));
7331     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7332     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7333     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7334     mp[cp]->product->api_user = product->api_user;
7335     PetscCall(MatProductSetFromOptions(mp[cp]));
7336     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7337     PetscCall(ISGetIndices(glob, &globidx));
7338     rmapt[cp] = 2;
7339     rmapa[cp] = globidx;
7340     cmapt[cp] = 2;
7341     cmapa[cp] = globidx;
7342     mptmp[cp] = PETSC_FALSE;
7343     cp++;
7344     if (mmdata->P_oth) {
7345       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7346       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7347       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7348       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7349       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7350       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7351       PetscCall(MatProductSetFill(mp[cp], product->fill));
7352       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7353       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7354       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7355       mp[cp]->product->api_user = product->api_user;
7356       PetscCall(MatProductSetFromOptions(mp[cp]));
7357       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7358       mptmp[cp] = PETSC_TRUE;
7359       cp++;
7360       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7361       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7362       PetscCall(MatProductSetFill(mp[cp], product->fill));
7363       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7364       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7365       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7366       mp[cp]->product->api_user = product->api_user;
7367       PetscCall(MatProductSetFromOptions(mp[cp]));
7368       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7369       rmapt[cp] = 2;
7370       rmapa[cp] = globidx;
7371       cmapt[cp] = 2;
7372       cmapa[cp] = P_oth_idx;
7373       mptmp[cp] = PETSC_FALSE;
7374       cp++;
7375     }
7376     break;
7377   default:
7378     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7379   }
7380   /* sanity check */
7381   if (size > 1)
7382     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7383 
7384   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7385   for (i = 0; i < cp; i++) {
7386     mmdata->mp[i]    = mp[i];
7387     mmdata->mptmp[i] = mptmp[i];
7388   }
7389   mmdata->cp             = cp;
7390   C->product->data       = mmdata;
7391   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7392   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7393 
7394   /* memory type */
7395   mmdata->mtype = PETSC_MEMTYPE_HOST;
7396   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7397   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7398   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7399   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7400   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7401   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7402 
7403   /* prepare coo coordinates for values insertion */
7404 
7405   /* count total nonzeros of those intermediate seqaij Mats
7406     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7407     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7408     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7409   */
7410   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7411     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7412     if (mptmp[cp]) continue;
7413     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7414       const PetscInt *rmap = rmapa[cp];
7415       const PetscInt  mr   = mp[cp]->rmap->n;
7416       const PetscInt  rs   = C->rmap->rstart;
7417       const PetscInt  re   = C->rmap->rend;
7418       const PetscInt *ii   = mm->i;
7419       for (i = 0; i < mr; i++) {
7420         const PetscInt gr = rmap[i];
7421         const PetscInt nz = ii[i + 1] - ii[i];
7422         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7423         else ncoo_oown += nz;                  /* this row is local */
7424       }
7425     } else ncoo_d += mm->nz;
7426   }
7427 
7428   /*
7429     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7430 
7431     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7432 
7433     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7434 
7435     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7436     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7437     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7438 
7439     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7440     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7441   */
7442   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7443   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7444 
7445   /* gather (i,j) of nonzeros inserted by remote procs */
7446   if (hasoffproc) {
7447     PetscSF  msf;
7448     PetscInt ncoo2, *coo_i2, *coo_j2;
7449 
7450     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7451     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7452     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7453 
7454     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7455       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7456       PetscInt   *idxoff = mmdata->off[cp];
7457       PetscInt   *idxown = mmdata->own[cp];
7458       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7459         const PetscInt *rmap = rmapa[cp];
7460         const PetscInt *cmap = cmapa[cp];
7461         const PetscInt *ii   = mm->i;
7462         PetscInt       *coi  = coo_i + ncoo_o;
7463         PetscInt       *coj  = coo_j + ncoo_o;
7464         const PetscInt  mr   = mp[cp]->rmap->n;
7465         const PetscInt  rs   = C->rmap->rstart;
7466         const PetscInt  re   = C->rmap->rend;
7467         const PetscInt  cs   = C->cmap->rstart;
7468         for (i = 0; i < mr; i++) {
7469           const PetscInt *jj = mm->j + ii[i];
7470           const PetscInt  gr = rmap[i];
7471           const PetscInt  nz = ii[i + 1] - ii[i];
7472           if (gr < rs || gr >= re) { /* this is an offproc row */
7473             for (j = ii[i]; j < ii[i + 1]; j++) {
7474               *coi++    = gr;
7475               *idxoff++ = j;
7476             }
7477             if (!cmapt[cp]) { /* already global */
7478               for (j = 0; j < nz; j++) *coj++ = jj[j];
7479             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7480               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7481             } else { /* offdiag */
7482               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7483             }
7484             ncoo_o += nz;
7485           } else { /* this is a local row */
7486             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7487           }
7488         }
7489       }
7490       mmdata->off[cp + 1] = idxoff;
7491       mmdata->own[cp + 1] = idxown;
7492     }
7493 
7494     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7495     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7496     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7497     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7498     ncoo = ncoo_d + ncoo_oown + ncoo2;
7499     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7500     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7501     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7502     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7503     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7504     PetscCall(PetscFree2(coo_i, coo_j));
7505     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7506     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7507     coo_i = coo_i2;
7508     coo_j = coo_j2;
7509   } else { /* no offproc values insertion */
7510     ncoo = ncoo_d;
7511     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7512 
7513     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7514     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7515     PetscCall(PetscSFSetUp(mmdata->sf));
7516   }
7517   mmdata->hasoffproc = hasoffproc;
7518 
7519   /* gather (i,j) of nonzeros inserted locally */
7520   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7521     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7522     PetscInt       *coi  = coo_i + ncoo_d;
7523     PetscInt       *coj  = coo_j + ncoo_d;
7524     const PetscInt *jj   = mm->j;
7525     const PetscInt *ii   = mm->i;
7526     const PetscInt *cmap = cmapa[cp];
7527     const PetscInt *rmap = rmapa[cp];
7528     const PetscInt  mr   = mp[cp]->rmap->n;
7529     const PetscInt  rs   = C->rmap->rstart;
7530     const PetscInt  re   = C->rmap->rend;
7531     const PetscInt  cs   = C->cmap->rstart;
7532 
7533     if (mptmp[cp]) continue;
7534     if (rmapt[cp] == 1) { /* consecutive rows */
7535       /* fill coo_i */
7536       for (i = 0; i < mr; i++) {
7537         const PetscInt gr = i + rs;
7538         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7539       }
7540       /* fill coo_j */
7541       if (!cmapt[cp]) { /* type-0, already global */
7542         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7543       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7544         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7545       } else {                                            /* type-2, local to global for sparse columns */
7546         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7547       }
7548       ncoo_d += mm->nz;
7549     } else if (rmapt[cp] == 2) { /* sparse rows */
7550       for (i = 0; i < mr; i++) {
7551         const PetscInt *jj = mm->j + ii[i];
7552         const PetscInt  gr = rmap[i];
7553         const PetscInt  nz = ii[i + 1] - ii[i];
7554         if (gr >= rs && gr < re) { /* local rows */
7555           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7556           if (!cmapt[cp]) { /* type-0, already global */
7557             for (j = 0; j < nz; j++) *coj++ = jj[j];
7558           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7559             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7560           } else { /* type-2, local to global for sparse columns */
7561             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7562           }
7563           ncoo_d += nz;
7564         }
7565       }
7566     }
7567   }
7568   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7569   PetscCall(ISDestroy(&glob));
7570   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7571   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7572   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7573   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7574 
7575   /* preallocate with COO data */
7576   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7577   PetscCall(PetscFree2(coo_i, coo_j));
7578   PetscFunctionReturn(PETSC_SUCCESS);
7579 }
7580 
7581 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7582 {
7583   Mat_Product *product = mat->product;
7584 #if defined(PETSC_HAVE_DEVICE)
7585   PetscBool match  = PETSC_FALSE;
7586   PetscBool usecpu = PETSC_FALSE;
7587 #else
7588   PetscBool match = PETSC_TRUE;
7589 #endif
7590 
7591   PetscFunctionBegin;
7592   MatCheckProduct(mat, 1);
7593 #if defined(PETSC_HAVE_DEVICE)
7594   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7595   if (match) { /* we can always fallback to the CPU if requested */
7596     switch (product->type) {
7597     case MATPRODUCT_AB:
7598       if (product->api_user) {
7599         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7600         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7601         PetscOptionsEnd();
7602       } else {
7603         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7604         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7605         PetscOptionsEnd();
7606       }
7607       break;
7608     case MATPRODUCT_AtB:
7609       if (product->api_user) {
7610         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7611         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7612         PetscOptionsEnd();
7613       } else {
7614         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7615         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7616         PetscOptionsEnd();
7617       }
7618       break;
7619     case MATPRODUCT_PtAP:
7620       if (product->api_user) {
7621         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7622         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7623         PetscOptionsEnd();
7624       } else {
7625         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7626         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7627         PetscOptionsEnd();
7628       }
7629       break;
7630     default:
7631       break;
7632     }
7633     match = (PetscBool)!usecpu;
7634   }
7635 #endif
7636   if (match) {
7637     switch (product->type) {
7638     case MATPRODUCT_AB:
7639     case MATPRODUCT_AtB:
7640     case MATPRODUCT_PtAP:
7641       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7642       break;
7643     default:
7644       break;
7645     }
7646   }
7647   /* fallback to MPIAIJ ops */
7648   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7649   PetscFunctionReturn(PETSC_SUCCESS);
7650 }
7651 
7652 /*
7653    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7654 
7655    n - the number of block indices in cc[]
7656    cc - the block indices (must be large enough to contain the indices)
7657 */
7658 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7659 {
7660   PetscInt        cnt = -1, nidx, j;
7661   const PetscInt *idx;
7662 
7663   PetscFunctionBegin;
7664   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7665   if (nidx) {
7666     cnt     = 0;
7667     cc[cnt] = idx[0] / bs;
7668     for (j = 1; j < nidx; j++) {
7669       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7670     }
7671   }
7672   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7673   *n = cnt + 1;
7674   PetscFunctionReturn(PETSC_SUCCESS);
7675 }
7676 
7677 /*
7678     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7679 
7680     ncollapsed - the number of block indices
7681     collapsed - the block indices (must be large enough to contain the indices)
7682 */
7683 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7684 {
7685   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7686 
7687   PetscFunctionBegin;
7688   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7689   for (i = start + 1; i < start + bs; i++) {
7690     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7691     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7692     cprevtmp = cprev;
7693     cprev    = merged;
7694     merged   = cprevtmp;
7695   }
7696   *ncollapsed = nprev;
7697   if (collapsed) *collapsed = cprev;
7698   PetscFunctionReturn(PETSC_SUCCESS);
7699 }
7700 
7701 /*
7702  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7703 
7704  Input Parameter:
7705  . Amat - matrix
7706  - symmetrize - make the result symmetric
7707  + scale - scale with diagonal
7708 
7709  Output Parameter:
7710  . a_Gmat - output scalar graph >= 0
7711 
7712 */
7713 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7714 {
7715   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7716   MPI_Comm  comm;
7717   Mat       Gmat;
7718   PetscBool ismpiaij, isseqaij;
7719   Mat       a, b, c;
7720   MatType   jtype;
7721 
7722   PetscFunctionBegin;
7723   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7724   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7725   PetscCall(MatGetSize(Amat, &MM, &NN));
7726   PetscCall(MatGetBlockSize(Amat, &bs));
7727   nloc = (Iend - Istart) / bs;
7728 
7729   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7730   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7731   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7732 
7733   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7734   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7735      implementation */
7736   if (bs > 1) {
7737     PetscCall(MatGetType(Amat, &jtype));
7738     PetscCall(MatCreate(comm, &Gmat));
7739     PetscCall(MatSetType(Gmat, jtype));
7740     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7741     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7742     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7743       PetscInt  *d_nnz, *o_nnz;
7744       MatScalar *aa, val, *AA;
7745       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7746       if (isseqaij) {
7747         a = Amat;
7748         b = NULL;
7749       } else {
7750         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7751         a             = d->A;
7752         b             = d->B;
7753       }
7754       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7755       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7756       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7757         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7758         const PetscInt *cols1, *cols2;
7759         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7760           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7761           nnz[brow / bs] = nc2 / bs;
7762           if (nc2 % bs) ok = 0;
7763           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7764           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7765             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7766             if (nc1 != nc2) ok = 0;
7767             else {
7768               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7769                 if (cols1[jj] != cols2[jj]) ok = 0;
7770                 if (cols1[jj] % bs != jj % bs) ok = 0;
7771               }
7772             }
7773             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7774           }
7775           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7776           if (!ok) {
7777             PetscCall(PetscFree2(d_nnz, o_nnz));
7778             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7779             goto old_bs;
7780           }
7781         }
7782       }
7783       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7784       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7785       PetscCall(PetscFree2(d_nnz, o_nnz));
7786       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7787       // diag
7788       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7789         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7790         ai               = aseq->i;
7791         n                = ai[brow + 1] - ai[brow];
7792         aj               = aseq->j + ai[brow];
7793         for (int k = 0; k < n; k += bs) {        // block columns
7794           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7795           val        = 0;
7796           for (int ii = 0; ii < bs; ii++) { // rows in block
7797             aa = aseq->a + ai[brow + ii] + k;
7798             for (int jj = 0; jj < bs; jj++) {         // columns in block
7799               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7800             }
7801           }
7802           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7803           AA[k / bs] = val;
7804         }
7805         grow = Istart / bs + brow / bs;
7806         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7807       }
7808       // off-diag
7809       if (ismpiaij) {
7810         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7811         const PetscScalar *vals;
7812         const PetscInt    *cols, *garray = aij->garray;
7813         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7814         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7815           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7816           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7817             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7818             AA[k / bs] = 0;
7819             AJ[cidx]   = garray[cols[k]] / bs;
7820           }
7821           nc = ncols / bs;
7822           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7823           for (int ii = 0; ii < bs; ii++) { // rows in block
7824             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7825             for (int k = 0; k < ncols; k += bs) {
7826               for (int jj = 0; jj < bs; jj++) { // cols in block
7827                 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7828                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7829               }
7830             }
7831             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7832           }
7833           grow = Istart / bs + brow / bs;
7834           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7835         }
7836       }
7837       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7838       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7839       PetscCall(PetscFree2(AA, AJ));
7840     } else {
7841       const PetscScalar *vals;
7842       const PetscInt    *idx;
7843       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7844     old_bs:
7845       /*
7846        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7847        */
7848       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7849       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7850       if (isseqaij) {
7851         PetscInt max_d_nnz;
7852         /*
7853          Determine exact preallocation count for (sequential) scalar matrix
7854          */
7855         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7856         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7857         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7858         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7859         PetscCall(PetscFree3(w0, w1, w2));
7860       } else if (ismpiaij) {
7861         Mat             Daij, Oaij;
7862         const PetscInt *garray;
7863         PetscInt        max_d_nnz;
7864         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7865         /*
7866          Determine exact preallocation count for diagonal block portion of scalar matrix
7867          */
7868         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7869         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7870         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7871         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7872         PetscCall(PetscFree3(w0, w1, w2));
7873         /*
7874          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7875          */
7876         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7877           o_nnz[jj] = 0;
7878           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7879             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7880             o_nnz[jj] += ncols;
7881             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7882           }
7883           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7884         }
7885       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7886       /* get scalar copy (norms) of matrix */
7887       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7888       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7889       PetscCall(PetscFree2(d_nnz, o_nnz));
7890       for (Ii = Istart; Ii < Iend; Ii++) {
7891         PetscInt dest_row = Ii / bs;
7892         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7893         for (jj = 0; jj < ncols; jj++) {
7894           PetscInt    dest_col = idx[jj] / bs;
7895           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7896           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7897         }
7898         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7899       }
7900       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7901       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7902     }
7903   } else {
7904     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7905     else {
7906       Gmat = Amat;
7907       PetscCall(PetscObjectReference((PetscObject)Gmat));
7908     }
7909     if (isseqaij) {
7910       a = Gmat;
7911       b = NULL;
7912     } else {
7913       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7914       a             = d->A;
7915       b             = d->B;
7916     }
7917     if (filter >= 0 || scale) {
7918       /* take absolute value of each entry */
7919       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7920         MatInfo      info;
7921         PetscScalar *avals;
7922         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7923         PetscCall(MatSeqAIJGetArray(c, &avals));
7924         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7925         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7926       }
7927     }
7928   }
7929   if (symmetrize) {
7930     PetscBool isset, issym;
7931     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7932     if (!isset || !issym) {
7933       Mat matTrans;
7934       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7935       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7936       PetscCall(MatDestroy(&matTrans));
7937     }
7938     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7939   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7940   if (scale) {
7941     /* scale c for all diagonal values = 1 or -1 */
7942     Vec diag;
7943     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7944     PetscCall(MatGetDiagonal(Gmat, diag));
7945     PetscCall(VecReciprocal(diag));
7946     PetscCall(VecSqrtAbs(diag));
7947     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7948     PetscCall(VecDestroy(&diag));
7949   }
7950   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7951 
7952   if (filter >= 0) {
7953     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
7954     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
7955   }
7956   *a_Gmat = Gmat;
7957   PetscFunctionReturn(PETSC_SUCCESS);
7958 }
7959 
7960 /*
7961     Special version for direct calls from Fortran
7962 */
7963 #include <petsc/private/fortranimpl.h>
7964 
7965 /* Change these macros so can be used in void function */
7966 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7967 #undef PetscCall
7968 #define PetscCall(...) \
7969   do { \
7970     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
7971     if (PetscUnlikely(ierr_msv_mpiaij)) { \
7972       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
7973       return; \
7974     } \
7975   } while (0)
7976 
7977 #undef SETERRQ
7978 #define SETERRQ(comm, ierr, ...) \
7979   do { \
7980     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
7981     return; \
7982   } while (0)
7983 
7984 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7985   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7986 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7987   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7988 #else
7989 #endif
7990 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
7991 {
7992   Mat         mat = *mmat;
7993   PetscInt    m = *mm, n = *mn;
7994   InsertMode  addv = *maddv;
7995   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
7996   PetscScalar value;
7997 
7998   MatCheckPreallocated(mat, 1);
7999   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8000   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8001   {
8002     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8003     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8004     PetscBool roworiented = aij->roworiented;
8005 
8006     /* Some Variables required in the macro */
8007     Mat         A     = aij->A;
8008     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8009     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8010     MatScalar  *aa;
8011     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8012     Mat         B                 = aij->B;
8013     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8014     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8015     MatScalar  *ba;
8016     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8017      * cannot use "#if defined" inside a macro. */
8018     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8019 
8020     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8021     PetscInt   nonew = a->nonew;
8022     MatScalar *ap1, *ap2;
8023 
8024     PetscFunctionBegin;
8025     PetscCall(MatSeqAIJGetArray(A, &aa));
8026     PetscCall(MatSeqAIJGetArray(B, &ba));
8027     for (i = 0; i < m; i++) {
8028       if (im[i] < 0) continue;
8029       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8030       if (im[i] >= rstart && im[i] < rend) {
8031         row      = im[i] - rstart;
8032         lastcol1 = -1;
8033         rp1      = aj + ai[row];
8034         ap1      = aa + ai[row];
8035         rmax1    = aimax[row];
8036         nrow1    = ailen[row];
8037         low1     = 0;
8038         high1    = nrow1;
8039         lastcol2 = -1;
8040         rp2      = bj + bi[row];
8041         ap2      = ba + bi[row];
8042         rmax2    = bimax[row];
8043         nrow2    = bilen[row];
8044         low2     = 0;
8045         high2    = nrow2;
8046 
8047         for (j = 0; j < n; j++) {
8048           if (roworiented) value = v[i * n + j];
8049           else value = v[i + j * m];
8050           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8051           if (in[j] >= cstart && in[j] < cend) {
8052             col = in[j] - cstart;
8053             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8054           } else if (in[j] < 0) continue;
8055           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8056             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8057           } else {
8058             if (mat->was_assembled) {
8059               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8060 #if defined(PETSC_USE_CTABLE)
8061               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8062               col--;
8063 #else
8064               col = aij->colmap[in[j]] - 1;
8065 #endif
8066               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8067                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8068                 col = in[j];
8069                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8070                 B        = aij->B;
8071                 b        = (Mat_SeqAIJ *)B->data;
8072                 bimax    = b->imax;
8073                 bi       = b->i;
8074                 bilen    = b->ilen;
8075                 bj       = b->j;
8076                 rp2      = bj + bi[row];
8077                 ap2      = ba + bi[row];
8078                 rmax2    = bimax[row];
8079                 nrow2    = bilen[row];
8080                 low2     = 0;
8081                 high2    = nrow2;
8082                 bm       = aij->B->rmap->n;
8083                 ba       = b->a;
8084                 inserted = PETSC_FALSE;
8085               }
8086             } else col = in[j];
8087             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8088           }
8089         }
8090       } else if (!aij->donotstash) {
8091         if (roworiented) {
8092           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8093         } else {
8094           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8095         }
8096       }
8097     }
8098     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8099     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8100   }
8101   PetscFunctionReturnVoid();
8102 }
8103 
8104 /* Undefining these here since they were redefined from their original definition above! No
8105  * other PETSc functions should be defined past this point, as it is impossible to recover the
8106  * original definitions */
8107 #undef PetscCall
8108 #undef SETERRQ
8109