xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision cac3c07dbc4e95423e22cb699bb64807a71d0bfe)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166 
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
171 {
172   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
173 
174   PetscFunctionBegin;
175   if (mat->A) {
176     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
177     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
178   }
179   PetscFunctionReturn(PETSC_SUCCESS);
180 }
181 
182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
183 {
184   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
185   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
186   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
187   const PetscInt  *ia, *ib;
188   const MatScalar *aa, *bb, *aav, *bav;
189   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
190   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
191 
192   PetscFunctionBegin;
193   *keptrows = NULL;
194 
195   ia = a->i;
196   ib = b->i;
197   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
198   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
199   for (i = 0; i < m; i++) {
200     na = ia[i + 1] - ia[i];
201     nb = ib[i + 1] - ib[i];
202     if (!na && !nb) {
203       cnt++;
204       goto ok1;
205     }
206     aa = aav + ia[i];
207     for (j = 0; j < na; j++) {
208       if (aa[j] != 0.0) goto ok1;
209     }
210     bb = PetscSafePointerPlusOffset(bav, ib[i]);
211     for (j = 0; j < nb; j++) {
212       if (bb[j] != 0.0) goto ok1;
213     }
214     cnt++;
215   ok1:;
216   }
217   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
218   if (!n0rows) {
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
220     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
221     PetscFunctionReturn(PETSC_SUCCESS);
222   }
223   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
224   cnt = 0;
225   for (i = 0; i < m; i++) {
226     na = ia[i + 1] - ia[i];
227     nb = ib[i + 1] - ib[i];
228     if (!na && !nb) continue;
229     aa = aav + ia[i];
230     for (j = 0; j < na; j++) {
231       if (aa[j] != 0.0) {
232         rows[cnt++] = rstart + i;
233         goto ok2;
234       }
235     }
236     bb = PetscSafePointerPlusOffset(bav, ib[i]);
237     for (j = 0; j < nb; j++) {
238       if (bb[j] != 0.0) {
239         rows[cnt++] = rstart + i;
240         goto ok2;
241       }
242     }
243   ok2:;
244   }
245   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
247   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
252 {
253   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
254   PetscBool   cong;
255 
256   PetscFunctionBegin;
257   PetscCall(MatHasCongruentLayouts(Y, &cong));
258   if (Y->assembled && cong) {
259     PetscCall(MatDiagonalSet(aij->A, D, is));
260   } else {
261     PetscCall(MatDiagonalSet_Default(Y, D, is));
262   }
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
267 {
268   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
269   PetscInt    i, rstart, nrows, *rows;
270 
271   PetscFunctionBegin;
272   *zrows = NULL;
273   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
274   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
275   for (i = 0; i < nrows; i++) rows[i] += rstart;
276   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
281 {
282   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
283   PetscInt           i, m, n, *garray = aij->garray;
284   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
285   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
286   PetscReal         *work;
287   const PetscScalar *dummy;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   if (type == NORM_INFINITY) {
313     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
314   } else {
315     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
316   }
317   PetscCall(PetscFree(work));
318   if (type == NORM_2) {
319     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
320   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
321     for (i = 0; i < n; i++) reductions[i] /= m;
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
327 {
328   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
329   IS              sis, gis;
330   const PetscInt *isis, *igis;
331   PetscInt        n, *iis, nsis, ngis, rstart, i;
332 
333   PetscFunctionBegin;
334   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
335   PetscCall(MatFindNonzeroRows(a->B, &gis));
336   PetscCall(ISGetSize(gis, &ngis));
337   PetscCall(ISGetSize(sis, &nsis));
338   PetscCall(ISGetIndices(sis, &isis));
339   PetscCall(ISGetIndices(gis, &igis));
340 
341   PetscCall(PetscMalloc1(ngis + nsis, &iis));
342   PetscCall(PetscArraycpy(iis, igis, ngis));
343   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
344   n = ngis + nsis;
345   PetscCall(PetscSortRemoveDupsInt(&n, iis));
346   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
347   for (i = 0; i < n; i++) iis[i] += rstart;
348   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
349 
350   PetscCall(ISRestoreIndices(sis, &isis));
351   PetscCall(ISRestoreIndices(gis, &igis));
352   PetscCall(ISDestroy(&sis));
353   PetscCall(ISDestroy(&gis));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*
358   Local utility routine that creates a mapping from the global column
359 number to the local number in the off-diagonal part of the local
360 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
361 a slightly higher hash table cost; without it it is not scalable (each processor
362 has an order N integer array but is fast to access.
363 */
364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
365 {
366   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
367   PetscInt    n   = aij->B->cmap->n, i;
368 
369   PetscFunctionBegin;
370   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
371 #if defined(PETSC_USE_CTABLE)
372   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
373   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
374 #else
375   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
376   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
377 #endif
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
382   do { \
383     if (col <= lastcol1) low1 = 0; \
384     else high1 = nrow1; \
385     lastcol1 = col; \
386     while (high1 - low1 > 5) { \
387       t = (low1 + high1) / 2; \
388       if (rp1[t] > col) high1 = t; \
389       else low1 = t; \
390     } \
391     for (_i = low1; _i < high1; _i++) { \
392       if (rp1[_i] > col) break; \
393       if (rp1[_i] == col) { \
394         if (addv == ADD_VALUES) { \
395           ap1[_i] += value; \
396           /* Not sure LogFlops will slow dow the code or not */ \
397           (void)PetscLogFlops(1.0); \
398         } else ap1[_i] = value; \
399         goto a_noinsert; \
400       } \
401     } \
402     if (value == 0.0 && ignorezeroentries && row != col) { \
403       low1  = 0; \
404       high1 = nrow1; \
405       goto a_noinsert; \
406     } \
407     if (nonew == 1) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
413     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
414     N = nrow1++ - 1; \
415     a->nz++; \
416     high1++; \
417     /* shift up all the later entries in this row */ \
418     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
419     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
420     rp1[_i] = col; \
421     ap1[_i] = value; \
422     A->nonzerostate++; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467     B->nonzerostate++; \
468   b_noinsert:; \
469     bilen[row] = nrow2; \
470   } while (0)
471 
472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
473 {
474   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
475   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
476   PetscInt     l, *garray                         = mat->garray, diag;
477   PetscScalar *aa, *ba;
478 
479   PetscFunctionBegin;
480   /* code only works for square matrices A */
481 
482   /* find size of row to the left of the diagonal part */
483   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
484   row = row - diag;
485   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
486     if (garray[b->j[b->i[row] + l]] > diag) break;
487   }
488   if (l) {
489     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
490     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
491     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
492   }
493 
494   /* diagonal part */
495   if (a->i[row + 1] - a->i[row]) {
496     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
497     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
498     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
499   }
500 
501   /* right of diagonal part */
502   if (b->i[row + 1] - b->i[row] - l) {
503     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
504     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
505     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
506   }
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
511 {
512   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
513   PetscScalar value = 0.0;
514   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
515   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
516   PetscBool   roworiented = aij->roworiented;
517 
518   /* Some Variables required in the macro */
519   Mat         A     = aij->A;
520   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
521   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
522   PetscBool   ignorezeroentries = a->ignorezeroentries;
523   Mat         B                 = aij->B;
524   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
525   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
526   MatScalar  *aa, *ba;
527   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
528   PetscInt    nonew;
529   MatScalar  *ap1, *ap2;
530 
531   PetscFunctionBegin;
532   PetscCall(MatSeqAIJGetArray(A, &aa));
533   PetscCall(MatSeqAIJGetArray(B, &ba));
534   for (i = 0; i < m; i++) {
535     if (im[i] < 0) continue;
536     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
537     if (im[i] >= rstart && im[i] < rend) {
538       row      = im[i] - rstart;
539       lastcol1 = -1;
540       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
541       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
542       rmax1    = aimax[row];
543       nrow1    = ailen[row];
544       low1     = 0;
545       high1    = nrow1;
546       lastcol2 = -1;
547       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
548       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
549       rmax2    = bimax[row];
550       nrow2    = bilen[row];
551       low2     = 0;
552       high2    = nrow2;
553 
554       for (j = 0; j < n; j++) {
555         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
556         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
557         if (in[j] >= cstart && in[j] < cend) {
558           col   = in[j] - cstart;
559           nonew = a->nonew;
560           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
561         } else if (in[j] < 0) {
562           continue;
563         } else {
564           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
565           if (mat->was_assembled) {
566             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
567 #if defined(PETSC_USE_CTABLE)
568             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
569             col--;
570 #else
571             col = aij->colmap[in[j]] - 1;
572 #endif
573             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
574               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
575               col = in[j];
576               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
577               B     = aij->B;
578               b     = (Mat_SeqAIJ *)B->data;
579               bimax = b->imax;
580               bi    = b->i;
581               bilen = b->ilen;
582               bj    = b->j;
583               ba    = b->a;
584               rp2   = bj + bi[row];
585               ap2   = ba + bi[row];
586               rmax2 = bimax[row];
587               nrow2 = bilen[row];
588               low2  = 0;
589               high2 = nrow2;
590               bm    = aij->B->rmap->n;
591               ba    = b->a;
592             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
593               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
594                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
595               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
596             }
597           } else col = in[j];
598           nonew = b->nonew;
599           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
600         }
601       }
602     } else {
603       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
604       if (!aij->donotstash) {
605         mat->assembled = PETSC_FALSE;
606         if (roworiented) {
607           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
608         } else {
609           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
610         }
611       }
612     }
613   }
614   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
615   PetscCall(MatSeqAIJRestoreArray(B, &ba));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 /*
620     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
621     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
622     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
623 */
624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
625 {
626   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
627   Mat         A      = aij->A; /* diagonal part of the matrix */
628   Mat         B      = aij->B; /* off-diagonal part of the matrix */
629   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
630   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
632   PetscInt   *ailen = a->ilen, *aj = a->j;
633   PetscInt   *bilen = b->ilen, *bj = b->j;
634   PetscInt    am          = aij->A->rmap->n, j;
635   PetscInt    diag_so_far = 0, dnz;
636   PetscInt    offd_so_far = 0, onz;
637 
638   PetscFunctionBegin;
639   /* Iterate over all rows of the matrix */
640   for (j = 0; j < am; j++) {
641     dnz = onz = 0;
642     /*  Iterate over all non-zero columns of the current row */
643     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
644       /* If column is in the diagonal */
645       if (mat_j[col] >= cstart && mat_j[col] < cend) {
646         aj[diag_so_far++] = mat_j[col] - cstart;
647         dnz++;
648       } else { /* off-diagonal entries */
649         bj[offd_so_far++] = mat_j[col];
650         onz++;
651       }
652     }
653     ailen[j] = dnz;
654     bilen[j] = onz;
655   }
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 /*
660     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
661     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
662     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
663     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
664     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
665 */
666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
667 {
668   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
669   Mat          A    = aij->A; /* diagonal part of the matrix */
670   Mat          B    = aij->B; /* off-diagonal part of the matrix */
671   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
672   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
673   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
674   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
675   PetscInt    *ailen = a->ilen, *aj = a->j;
676   PetscInt    *bilen = b->ilen, *bj = b->j;
677   PetscInt     am          = aij->A->rmap->n, j;
678   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
679   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
680   PetscScalar *aa = a->a, *ba = b->a;
681 
682   PetscFunctionBegin;
683   /* Iterate over all rows of the matrix */
684   for (j = 0; j < am; j++) {
685     dnz_row = onz_row = 0;
686     rowstart_offd     = full_offd_i[j];
687     rowstart_diag     = full_diag_i[j];
688     /*  Iterate over all non-zero columns of the current row */
689     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
690       /* If column is in the diagonal */
691       if (mat_j[col] >= cstart && mat_j[col] < cend) {
692         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
693         aa[rowstart_diag + dnz_row] = mat_a[col];
694         dnz_row++;
695       } else { /* off-diagonal entries */
696         bj[rowstart_offd + onz_row] = mat_j[col];
697         ba[rowstart_offd + onz_row] = mat_a[col];
698         onz_row++;
699       }
700     }
701     ailen[j] = dnz_row;
702     bilen[j] = onz_row;
703   }
704   PetscFunctionReturn(PETSC_SUCCESS);
705 }
706 
707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
708 {
709   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
710   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
711   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
712 
713   PetscFunctionBegin;
714   for (i = 0; i < m; i++) {
715     if (idxm[i] < 0) continue; /* negative row */
716     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
717     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
718     row = idxm[i] - rstart;
719     for (j = 0; j < n; j++) {
720       if (idxn[j] < 0) continue; /* negative column */
721       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
722       if (idxn[j] >= cstart && idxn[j] < cend) {
723         col = idxn[j] - cstart;
724         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
725       } else {
726         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
727 #if defined(PETSC_USE_CTABLE)
728         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
729         col--;
730 #else
731         col = aij->colmap[idxn[j]] - 1;
732 #endif
733         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
734         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
735       }
736     }
737   }
738   PetscFunctionReturn(PETSC_SUCCESS);
739 }
740 
741 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
742 {
743   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
744   PetscInt    nstash, reallocs;
745 
746   PetscFunctionBegin;
747   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
748 
749   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
750   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
751   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
752   PetscFunctionReturn(PETSC_SUCCESS);
753 }
754 
755 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
756 {
757   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
758   PetscMPIInt  n;
759   PetscInt     i, j, rstart, ncols, flg;
760   PetscInt    *row, *col;
761   PetscBool    other_disassembled;
762   PetscScalar *val;
763 
764   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
765 
766   PetscFunctionBegin;
767   if (!aij->donotstash && !mat->nooffprocentries) {
768     while (1) {
769       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
770       if (!flg) break;
771 
772       for (i = 0; i < n;) {
773         /* Now identify the consecutive vals belonging to the same row */
774         for (j = i, rstart = row[j]; j < n; j++) {
775           if (row[j] != rstart) break;
776         }
777         if (j < n) ncols = j - i;
778         else ncols = n - i;
779         /* Now assemble all these values with a single function call */
780         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
781         i = j;
782       }
783     }
784     PetscCall(MatStashScatterEnd_Private(&mat->stash));
785   }
786 #if defined(PETSC_HAVE_DEVICE)
787   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
788   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
789   if (mat->boundtocpu) {
790     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
791     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
792   }
793 #endif
794   PetscCall(MatAssemblyBegin(aij->A, mode));
795   PetscCall(MatAssemblyEnd(aij->A, mode));
796 
797   /* determine if any processor has disassembled, if so we must
798      also disassemble ourself, in order that we may reassemble. */
799   /*
800      if nonzero structure of submatrix B cannot change then we know that
801      no processor disassembled thus we can skip this stuff
802   */
803   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
804     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
805     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
806       PetscCall(MatDisAssemble_MPIAIJ(mat));
807     }
808   }
809   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
810   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
811 #if defined(PETSC_HAVE_DEVICE)
812   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
813 #endif
814   PetscCall(MatAssemblyBegin(aij->B, mode));
815   PetscCall(MatAssemblyEnd(aij->B, mode));
816 
817   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
818 
819   aij->rowvalues = NULL;
820 
821   PetscCall(VecDestroy(&aij->diag));
822 
823   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
824   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
825     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
826     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
827   }
828 #if defined(PETSC_HAVE_DEVICE)
829   mat->offloadmask = PETSC_OFFLOAD_BOTH;
830 #endif
831   PetscFunctionReturn(PETSC_SUCCESS);
832 }
833 
834 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
835 {
836   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
837 
838   PetscFunctionBegin;
839   PetscCall(MatZeroEntries(l->A));
840   PetscCall(MatZeroEntries(l->B));
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
845 {
846   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
847   PetscObjectState sA, sB;
848   PetscInt        *lrows;
849   PetscInt         r, len;
850   PetscBool        cong, lch, gch;
851 
852   PetscFunctionBegin;
853   /* get locally owned rows */
854   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
855   PetscCall(MatHasCongruentLayouts(A, &cong));
856   /* fix right hand side if needed */
857   if (x && b) {
858     const PetscScalar *xx;
859     PetscScalar       *bb;
860 
861     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
862     PetscCall(VecGetArrayRead(x, &xx));
863     PetscCall(VecGetArray(b, &bb));
864     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
865     PetscCall(VecRestoreArrayRead(x, &xx));
866     PetscCall(VecRestoreArray(b, &bb));
867   }
868 
869   sA = mat->A->nonzerostate;
870   sB = mat->B->nonzerostate;
871 
872   if (diag != 0.0 && cong) {
873     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
874     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
875   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
876     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
877     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
878     PetscInt    nnwA, nnwB;
879     PetscBool   nnzA, nnzB;
880 
881     nnwA = aijA->nonew;
882     nnwB = aijB->nonew;
883     nnzA = aijA->keepnonzeropattern;
884     nnzB = aijB->keepnonzeropattern;
885     if (!nnzA) {
886       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
887       aijA->nonew = 0;
888     }
889     if (!nnzB) {
890       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
891       aijB->nonew = 0;
892     }
893     /* Must zero here before the next loop */
894     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
895     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
896     for (r = 0; r < len; ++r) {
897       const PetscInt row = lrows[r] + A->rmap->rstart;
898       if (row >= A->cmap->N) continue;
899       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
900     }
901     aijA->nonew = nnwA;
902     aijB->nonew = nnwB;
903   } else {
904     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
905     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
906   }
907   PetscCall(PetscFree(lrows));
908   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
909   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
910 
911   /* reduce nonzerostate */
912   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
913   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
914   if (gch) A->nonzerostate++;
915   PetscFunctionReturn(PETSC_SUCCESS);
916 }
917 
918 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
919 {
920   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
921   PetscMPIInt        n = A->rmap->n;
922   PetscInt           i, j, r, m, len = 0;
923   PetscInt          *lrows, *owners = A->rmap->range;
924   PetscMPIInt        p = 0;
925   PetscSFNode       *rrows;
926   PetscSF            sf;
927   const PetscScalar *xx;
928   PetscScalar       *bb, *mask, *aij_a;
929   Vec                xmask, lmask;
930   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
931   const PetscInt    *aj, *ii, *ridx;
932   PetscScalar       *aa;
933 
934   PetscFunctionBegin;
935   /* Create SF where leaves are input rows and roots are owned rows */
936   PetscCall(PetscMalloc1(n, &lrows));
937   for (r = 0; r < n; ++r) lrows[r] = -1;
938   PetscCall(PetscMalloc1(N, &rrows));
939   for (r = 0; r < N; ++r) {
940     const PetscInt idx = rows[r];
941     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
942     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
943       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
944     }
945     rrows[r].rank  = p;
946     rrows[r].index = rows[r] - owners[p];
947   }
948   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
949   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
950   /* Collect flags for rows to be zeroed */
951   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
952   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
953   PetscCall(PetscSFDestroy(&sf));
954   /* Compress and put in row numbers */
955   for (r = 0; r < n; ++r)
956     if (lrows[r] >= 0) lrows[len++] = r;
957   /* zero diagonal part of matrix */
958   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
959   /* handle off-diagonal part of matrix */
960   PetscCall(MatCreateVecs(A, &xmask, NULL));
961   PetscCall(VecDuplicate(l->lvec, &lmask));
962   PetscCall(VecGetArray(xmask, &bb));
963   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
964   PetscCall(VecRestoreArray(xmask, &bb));
965   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
966   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
967   PetscCall(VecDestroy(&xmask));
968   if (x && b) { /* this code is buggy when the row and column layout don't match */
969     PetscBool cong;
970 
971     PetscCall(MatHasCongruentLayouts(A, &cong));
972     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
973     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
974     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
975     PetscCall(VecGetArrayRead(l->lvec, &xx));
976     PetscCall(VecGetArray(b, &bb));
977   }
978   PetscCall(VecGetArray(lmask, &mask));
979   /* remove zeroed rows of off-diagonal matrix */
980   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
981   ii = aij->i;
982   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
983   /* loop over all elements of off process part of matrix zeroing removed columns*/
984   if (aij->compressedrow.use) {
985     m    = aij->compressedrow.nrows;
986     ii   = aij->compressedrow.i;
987     ridx = aij->compressedrow.rindex;
988     for (i = 0; i < m; i++) {
989       n  = ii[i + 1] - ii[i];
990       aj = aij->j + ii[i];
991       aa = aij_a + ii[i];
992 
993       for (j = 0; j < n; j++) {
994         if (PetscAbsScalar(mask[*aj])) {
995           if (b) bb[*ridx] -= *aa * xx[*aj];
996           *aa = 0.0;
997         }
998         aa++;
999         aj++;
1000       }
1001       ridx++;
1002     }
1003   } else { /* do not use compressed row format */
1004     m = l->B->rmap->n;
1005     for (i = 0; i < m; i++) {
1006       n  = ii[i + 1] - ii[i];
1007       aj = aij->j + ii[i];
1008       aa = aij_a + ii[i];
1009       for (j = 0; j < n; j++) {
1010         if (PetscAbsScalar(mask[*aj])) {
1011           if (b) bb[i] -= *aa * xx[*aj];
1012           *aa = 0.0;
1013         }
1014         aa++;
1015         aj++;
1016       }
1017     }
1018   }
1019   if (x && b) {
1020     PetscCall(VecRestoreArray(b, &bb));
1021     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1022   }
1023   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1024   PetscCall(VecRestoreArray(lmask, &mask));
1025   PetscCall(VecDestroy(&lmask));
1026   PetscCall(PetscFree(lrows));
1027 
1028   /* only change matrix nonzero state if pattern was allowed to be changed */
1029   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1030     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1031     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1032   }
1033   PetscFunctionReturn(PETSC_SUCCESS);
1034 }
1035 
1036 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1037 {
1038   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1039   PetscInt    nt;
1040   VecScatter  Mvctx = a->Mvctx;
1041 
1042   PetscFunctionBegin;
1043   PetscCall(VecGetLocalSize(xx, &nt));
1044   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1045   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1046   PetscUseTypeMethod(a->A, mult, xx, yy);
1047   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1048   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1055 
1056   PetscFunctionBegin;
1057   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1058   PetscFunctionReturn(PETSC_SUCCESS);
1059 }
1060 
1061 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1062 {
1063   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1064   VecScatter  Mvctx = a->Mvctx;
1065 
1066   PetscFunctionBegin;
1067   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1068   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1069   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1070   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1071   PetscFunctionReturn(PETSC_SUCCESS);
1072 }
1073 
1074 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1075 {
1076   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1077 
1078   PetscFunctionBegin;
1079   /* do nondiagonal part */
1080   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1081   /* do local part */
1082   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1083   /* add partial results together */
1084   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1085   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1090 {
1091   MPI_Comm    comm;
1092   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1093   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1094   IS          Me, Notme;
1095   PetscInt    M, N, first, last, *notme, i;
1096   PetscBool   lf;
1097   PetscMPIInt size;
1098 
1099   PetscFunctionBegin;
1100   /* Easy test: symmetric diagonal block */
1101   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1102   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1103   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1104   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1105   PetscCallMPI(MPI_Comm_size(comm, &size));
1106   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1107 
1108   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1109   PetscCall(MatGetSize(Amat, &M, &N));
1110   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1111   PetscCall(PetscMalloc1(N - last + first, &notme));
1112   for (i = 0; i < first; i++) notme[i] = i;
1113   for (i = last; i < M; i++) notme[i - last + first] = i;
1114   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1115   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1116   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1117   Aoff = Aoffs[0];
1118   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1119   Boff = Boffs[0];
1120   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1121   PetscCall(MatDestroyMatrices(1, &Aoffs));
1122   PetscCall(MatDestroyMatrices(1, &Boffs));
1123   PetscCall(ISDestroy(&Me));
1124   PetscCall(ISDestroy(&Notme));
1125   PetscCall(PetscFree(notme));
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128 
1129 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1130 {
1131   PetscFunctionBegin;
1132   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1133   PetscFunctionReturn(PETSC_SUCCESS);
1134 }
1135 
1136 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1137 {
1138   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1139 
1140   PetscFunctionBegin;
1141   /* do nondiagonal part */
1142   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1143   /* do local part */
1144   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1145   /* add partial results together */
1146   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1147   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1148   PetscFunctionReturn(PETSC_SUCCESS);
1149 }
1150 
1151 /*
1152   This only works correctly for square matrices where the subblock A->A is the
1153    diagonal block
1154 */
1155 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1161   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1162   PetscCall(MatGetDiagonal(a->A, v));
1163   PetscFunctionReturn(PETSC_SUCCESS);
1164 }
1165 
1166 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1167 {
1168   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCall(MatScale(a->A, aa));
1172   PetscCall(MatScale(a->B, aa));
1173   PetscFunctionReturn(PETSC_SUCCESS);
1174 }
1175 
1176 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1177 {
1178   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1179   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1180   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1181   const PetscInt    *garray = aij->garray;
1182   const PetscScalar *aa, *ba;
1183   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1184   PetscInt64         nz, hnz;
1185   PetscInt          *rowlens;
1186   PetscInt          *colidxs;
1187   PetscScalar       *matvals;
1188   PetscMPIInt        rank;
1189 
1190   PetscFunctionBegin;
1191   PetscCall(PetscViewerSetUp(viewer));
1192 
1193   M  = mat->rmap->N;
1194   N  = mat->cmap->N;
1195   m  = mat->rmap->n;
1196   rs = mat->rmap->rstart;
1197   cs = mat->cmap->rstart;
1198   nz = A->nz + B->nz;
1199 
1200   /* write matrix header */
1201   header[0] = MAT_FILE_CLASSID;
1202   header[1] = M;
1203   header[2] = N;
1204   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1205   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1206   if (rank == 0) {
1207     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1208     else header[3] = (PetscInt)hnz;
1209   }
1210   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1211 
1212   /* fill in and store row lengths  */
1213   PetscCall(PetscMalloc1(m, &rowlens));
1214   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1215   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1216   PetscCall(PetscFree(rowlens));
1217 
1218   /* fill in and store column indices */
1219   PetscCall(PetscMalloc1(nz, &colidxs));
1220   for (cnt = 0, i = 0; i < m; i++) {
1221     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1222       if (garray[B->j[jb]] > cs) break;
1223       colidxs[cnt++] = garray[B->j[jb]];
1224     }
1225     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1226     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1227   }
1228   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1229   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1230   PetscCall(PetscFree(colidxs));
1231 
1232   /* fill in and store nonzero values */
1233   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1235   PetscCall(PetscMalloc1(nz, &matvals));
1236   for (cnt = 0, i = 0; i < m; i++) {
1237     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       matvals[cnt++] = ba[jb];
1240     }
1241     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1242     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1243   }
1244   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1245   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1246   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1247   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1248   PetscCall(PetscFree(matvals));
1249 
1250   /* write block size option to the viewer's .info file */
1251   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1252   PetscFunctionReturn(PETSC_SUCCESS);
1253 }
1254 
1255 #include <petscdraw.h>
1256 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1257 {
1258   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1259   PetscMPIInt       rank = aij->rank, size = aij->size;
1260   PetscBool         isdraw, iascii, isbinary;
1261   PetscViewer       sviewer;
1262   PetscViewerFormat format;
1263 
1264   PetscFunctionBegin;
1265   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1266   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1267   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1268   if (iascii) {
1269     PetscCall(PetscViewerGetFormat(viewer, &format));
1270     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1271       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1272       PetscCall(PetscMalloc1(size, &nz));
1273       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1274       for (i = 0; i < (PetscInt)size; i++) {
1275         nmax = PetscMax(nmax, nz[i]);
1276         nmin = PetscMin(nmin, nz[i]);
1277         navg += nz[i];
1278       }
1279       PetscCall(PetscFree(nz));
1280       navg = navg / size;
1281       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1282       PetscFunctionReturn(PETSC_SUCCESS);
1283     }
1284     PetscCall(PetscViewerGetFormat(viewer, &format));
1285     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1286       MatInfo   info;
1287       PetscInt *inodes = NULL;
1288 
1289       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1290       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1291       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1292       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1293       if (!inodes) {
1294         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1295                                                      (double)info.memory));
1296       } else {
1297         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1298                                                      (double)info.memory));
1299       }
1300       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1301       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1302       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1303       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1304       PetscCall(PetscViewerFlush(viewer));
1305       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1306       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1307       PetscCall(VecScatterView(aij->Mvctx, viewer));
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1310       PetscInt inodecount, inodelimit, *inodes;
1311       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1312       if (inodes) {
1313         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1314       } else {
1315         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1316       }
1317       PetscFunctionReturn(PETSC_SUCCESS);
1318     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1319       PetscFunctionReturn(PETSC_SUCCESS);
1320     }
1321   } else if (isbinary) {
1322     if (size == 1) {
1323       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1324       PetscCall(MatView(aij->A, viewer));
1325     } else {
1326       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1327     }
1328     PetscFunctionReturn(PETSC_SUCCESS);
1329   } else if (iascii && size == 1) {
1330     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1331     PetscCall(MatView(aij->A, viewer));
1332     PetscFunctionReturn(PETSC_SUCCESS);
1333   } else if (isdraw) {
1334     PetscDraw draw;
1335     PetscBool isnull;
1336     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1337     PetscCall(PetscDrawIsNull(draw, &isnull));
1338     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1339   }
1340 
1341   { /* assemble the entire matrix onto first processor */
1342     Mat A = NULL, Av;
1343     IS  isrow, iscol;
1344 
1345     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1346     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1347     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1348     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1349     /*  The commented code uses MatCreateSubMatrices instead */
1350     /*
1351     Mat *AA, A = NULL, Av;
1352     IS  isrow,iscol;
1353 
1354     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1355     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1356     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1357     if (rank == 0) {
1358        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1359        A    = AA[0];
1360        Av   = AA[0];
1361     }
1362     PetscCall(MatDestroySubMatrices(1,&AA));
1363 */
1364     PetscCall(ISDestroy(&iscol));
1365     PetscCall(ISDestroy(&isrow));
1366     /*
1367        Everyone has to call to draw the matrix since the graphics waits are
1368        synchronized across all processors that share the PetscDraw object
1369     */
1370     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1371     if (rank == 0) {
1372       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1373       PetscCall(MatView_SeqAIJ(Av, sviewer));
1374     }
1375     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1376     PetscCall(MatDestroy(&A));
1377   }
1378   PetscFunctionReturn(PETSC_SUCCESS);
1379 }
1380 
1381 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1382 {
1383   PetscBool iascii, isdraw, issocket, isbinary;
1384 
1385   PetscFunctionBegin;
1386   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1387   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1388   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1389   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1390   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1391   PetscFunctionReturn(PETSC_SUCCESS);
1392 }
1393 
1394 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1395 {
1396   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1397   Vec         bb1 = NULL;
1398   PetscBool   hasop;
1399 
1400   PetscFunctionBegin;
1401   if (flag == SOR_APPLY_UPPER) {
1402     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1403     PetscFunctionReturn(PETSC_SUCCESS);
1404   }
1405 
1406   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1407 
1408   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1409     if (flag & SOR_ZERO_INITIAL_GUESS) {
1410       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1411       its--;
1412     }
1413 
1414     while (its--) {
1415       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1416       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1417 
1418       /* update rhs: bb1 = bb - B*x */
1419       PetscCall(VecScale(mat->lvec, -1.0));
1420       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1421 
1422       /* local sweep */
1423       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1424     }
1425   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1426     if (flag & SOR_ZERO_INITIAL_GUESS) {
1427       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1428       its--;
1429     }
1430     while (its--) {
1431       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1432       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1433 
1434       /* update rhs: bb1 = bb - B*x */
1435       PetscCall(VecScale(mat->lvec, -1.0));
1436       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1437 
1438       /* local sweep */
1439       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1440     }
1441   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1444       its--;
1445     }
1446     while (its--) {
1447       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1448       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1449 
1450       /* update rhs: bb1 = bb - B*x */
1451       PetscCall(VecScale(mat->lvec, -1.0));
1452       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1453 
1454       /* local sweep */
1455       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1456     }
1457   } else if (flag & SOR_EISENSTAT) {
1458     Vec xx1;
1459 
1460     PetscCall(VecDuplicate(bb, &xx1));
1461     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1462 
1463     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1464     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1465     if (!mat->diag) {
1466       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1467       PetscCall(MatGetDiagonal(matin, mat->diag));
1468     }
1469     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1470     if (hasop) {
1471       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1472     } else {
1473       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1474     }
1475     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1476 
1477     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1478 
1479     /* local sweep */
1480     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1481     PetscCall(VecAXPY(xx, 1.0, xx1));
1482     PetscCall(VecDestroy(&xx1));
1483   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1484 
1485   PetscCall(VecDestroy(&bb1));
1486 
1487   matin->factorerrortype = mat->A->factorerrortype;
1488   PetscFunctionReturn(PETSC_SUCCESS);
1489 }
1490 
1491 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1492 {
1493   Mat             aA, aB, Aperm;
1494   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1495   PetscScalar    *aa, *ba;
1496   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1497   PetscSF         rowsf, sf;
1498   IS              parcolp = NULL;
1499   PetscBool       done;
1500 
1501   PetscFunctionBegin;
1502   PetscCall(MatGetLocalSize(A, &m, &n));
1503   PetscCall(ISGetIndices(rowp, &rwant));
1504   PetscCall(ISGetIndices(colp, &cwant));
1505   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1506 
1507   /* Invert row permutation to find out where my rows should go */
1508   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1509   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1510   PetscCall(PetscSFSetFromOptions(rowsf));
1511   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1512   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1513   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1514 
1515   /* Invert column permutation to find out where my columns should go */
1516   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1517   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1518   PetscCall(PetscSFSetFromOptions(sf));
1519   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1520   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1521   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1522   PetscCall(PetscSFDestroy(&sf));
1523 
1524   PetscCall(ISRestoreIndices(rowp, &rwant));
1525   PetscCall(ISRestoreIndices(colp, &cwant));
1526   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1527 
1528   /* Find out where my gcols should go */
1529   PetscCall(MatGetSize(aB, NULL, &ng));
1530   PetscCall(PetscMalloc1(ng, &gcdest));
1531   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1532   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1533   PetscCall(PetscSFSetFromOptions(sf));
1534   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1535   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1536   PetscCall(PetscSFDestroy(&sf));
1537 
1538   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1539   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1540   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1541   for (i = 0; i < m; i++) {
1542     PetscInt    row = rdest[i];
1543     PetscMPIInt rowner;
1544     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1545     for (j = ai[i]; j < ai[i + 1]; j++) {
1546       PetscInt    col = cdest[aj[j]];
1547       PetscMPIInt cowner;
1548       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1549       if (rowner == cowner) dnnz[i]++;
1550       else onnz[i]++;
1551     }
1552     for (j = bi[i]; j < bi[i + 1]; j++) {
1553       PetscInt    col = gcdest[bj[j]];
1554       PetscMPIInt cowner;
1555       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1556       if (rowner == cowner) dnnz[i]++;
1557       else onnz[i]++;
1558     }
1559   }
1560   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1561   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1562   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1563   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1564   PetscCall(PetscSFDestroy(&rowsf));
1565 
1566   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1567   PetscCall(MatSeqAIJGetArray(aA, &aa));
1568   PetscCall(MatSeqAIJGetArray(aB, &ba));
1569   for (i = 0; i < m; i++) {
1570     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1571     PetscInt  j0, rowlen;
1572     rowlen = ai[i + 1] - ai[i];
1573     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1574       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1575       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1576     }
1577     rowlen = bi[i + 1] - bi[i];
1578     for (j0 = j = 0; j < rowlen; j0 = j) {
1579       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1580       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1581     }
1582   }
1583   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1584   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1585   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1586   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1587   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1588   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1589   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1590   PetscCall(PetscFree3(work, rdest, cdest));
1591   PetscCall(PetscFree(gcdest));
1592   if (parcolp) PetscCall(ISDestroy(&colp));
1593   *B = Aperm;
1594   PetscFunctionReturn(PETSC_SUCCESS);
1595 }
1596 
1597 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1598 {
1599   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1600 
1601   PetscFunctionBegin;
1602   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1603   if (ghosts) *ghosts = aij->garray;
1604   PetscFunctionReturn(PETSC_SUCCESS);
1605 }
1606 
1607 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1608 {
1609   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1610   Mat            A = mat->A, B = mat->B;
1611   PetscLogDouble isend[5], irecv[5];
1612 
1613   PetscFunctionBegin;
1614   info->block_size = 1.0;
1615   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1616 
1617   isend[0] = info->nz_used;
1618   isend[1] = info->nz_allocated;
1619   isend[2] = info->nz_unneeded;
1620   isend[3] = info->memory;
1621   isend[4] = info->mallocs;
1622 
1623   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1624 
1625   isend[0] += info->nz_used;
1626   isend[1] += info->nz_allocated;
1627   isend[2] += info->nz_unneeded;
1628   isend[3] += info->memory;
1629   isend[4] += info->mallocs;
1630   if (flag == MAT_LOCAL) {
1631     info->nz_used      = isend[0];
1632     info->nz_allocated = isend[1];
1633     info->nz_unneeded  = isend[2];
1634     info->memory       = isend[3];
1635     info->mallocs      = isend[4];
1636   } else if (flag == MAT_GLOBAL_MAX) {
1637     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1638 
1639     info->nz_used      = irecv[0];
1640     info->nz_allocated = irecv[1];
1641     info->nz_unneeded  = irecv[2];
1642     info->memory       = irecv[3];
1643     info->mallocs      = irecv[4];
1644   } else if (flag == MAT_GLOBAL_SUM) {
1645     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1646 
1647     info->nz_used      = irecv[0];
1648     info->nz_allocated = irecv[1];
1649     info->nz_unneeded  = irecv[2];
1650     info->memory       = irecv[3];
1651     info->mallocs      = irecv[4];
1652   }
1653   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1654   info->fill_ratio_needed = 0;
1655   info->factor_mallocs    = 0;
1656   PetscFunctionReturn(PETSC_SUCCESS);
1657 }
1658 
1659 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1660 {
1661   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1662 
1663   PetscFunctionBegin;
1664   switch (op) {
1665   case MAT_NEW_NONZERO_LOCATIONS:
1666   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1667   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1668   case MAT_KEEP_NONZERO_PATTERN:
1669   case MAT_NEW_NONZERO_LOCATION_ERR:
1670   case MAT_USE_INODES:
1671   case MAT_IGNORE_ZERO_ENTRIES:
1672   case MAT_FORM_EXPLICIT_TRANSPOSE:
1673     MatCheckPreallocated(A, 1);
1674     PetscCall(MatSetOption(a->A, op, flg));
1675     PetscCall(MatSetOption(a->B, op, flg));
1676     break;
1677   case MAT_ROW_ORIENTED:
1678     MatCheckPreallocated(A, 1);
1679     a->roworiented = flg;
1680 
1681     PetscCall(MatSetOption(a->A, op, flg));
1682     PetscCall(MatSetOption(a->B, op, flg));
1683     break;
1684   case MAT_FORCE_DIAGONAL_ENTRIES:
1685   case MAT_SORTED_FULL:
1686     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1687     break;
1688   case MAT_IGNORE_OFF_PROC_ENTRIES:
1689     a->donotstash = flg;
1690     break;
1691   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1692   case MAT_SPD:
1693   case MAT_SYMMETRIC:
1694   case MAT_STRUCTURALLY_SYMMETRIC:
1695   case MAT_HERMITIAN:
1696   case MAT_SYMMETRY_ETERNAL:
1697   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1698   case MAT_SPD_ETERNAL:
1699     /* if the diagonal matrix is square it inherits some of the properties above */
1700     break;
1701   case MAT_SUBMAT_SINGLEIS:
1702     A->submat_singleis = flg;
1703     break;
1704   case MAT_STRUCTURE_ONLY:
1705     /* The option is handled directly by MatSetOption() */
1706     break;
1707   default:
1708     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1714 {
1715   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1716   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1717   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1718   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1719   PetscInt    *cmap, *idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1730     PetscInt    max = 1, tmp;
1731     for (i = 0; i < matin->rmap->n; i++) {
1732       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1736   }
1737 
1738   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA;
1742   pcA = &cworkA;
1743   pvB = &vworkB;
1744   pcB = &cworkB;
1745   if (!v) {
1746     pvA = NULL;
1747     pvB = NULL;
1748   }
1749   if (!idx) {
1750     pcA = NULL;
1751     if (!v) pcB = NULL;
1752   }
1753   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1754   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1755   nztot = nzA + nzB;
1756 
1757   cmap = mat->garray;
1758   if (v || idx) {
1759     if (nztot) {
1760       /* Sort by increasing column numbers, assuming A and B already sorted */
1761       PetscInt imark = -1;
1762       if (v) {
1763         *v = v_p = mat->rowvalues;
1764         for (i = 0; i < nzB; i++) {
1765           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766           else break;
1767         }
1768         imark = i;
1769         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1770         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1771       }
1772       if (idx) {
1773         *idx = idx_p = mat->rowindices;
1774         if (imark > -1) {
1775           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1776         } else {
1777           for (i = 0; i < nzB; i++) {
1778             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1779             else break;
1780           }
1781           imark = i;
1782         }
1783         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1784         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1785       }
1786     } else {
1787       if (idx) *idx = NULL;
1788       if (v) *v = NULL;
1789     }
1790   }
1791   *nz = nztot;
1792   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1793   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1798 {
1799   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1800 
1801   PetscFunctionBegin;
1802   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1803   aij->getrowactive = PETSC_FALSE;
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1808 {
1809   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1810   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1811   PetscInt         i, j, cstart = mat->cmap->rstart;
1812   PetscReal        sum = 0.0;
1813   const MatScalar *v, *amata, *bmata;
1814 
1815   PetscFunctionBegin;
1816   if (aij->size == 1) {
1817     PetscCall(MatNorm(aij->A, type, norm));
1818   } else {
1819     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1820     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1821     if (type == NORM_FROBENIUS) {
1822       v = amata;
1823       for (i = 0; i < amat->nz; i++) {
1824         sum += PetscRealPart(PetscConj(*v) * (*v));
1825         v++;
1826       }
1827       v = bmata;
1828       for (i = 0; i < bmat->nz; i++) {
1829         sum += PetscRealPart(PetscConj(*v) * (*v));
1830         v++;
1831       }
1832       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1833       *norm = PetscSqrtReal(*norm);
1834       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1835     } else if (type == NORM_1) { /* max column norm */
1836       PetscReal *tmp, *tmp2;
1837       PetscInt  *jj, *garray = aij->garray;
1838       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1839       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1840       *norm = 0.0;
1841       v     = amata;
1842       jj    = amat->j;
1843       for (j = 0; j < amat->nz; j++) {
1844         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1845         v++;
1846       }
1847       v  = bmata;
1848       jj = bmat->j;
1849       for (j = 0; j < bmat->nz; j++) {
1850         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1851         v++;
1852       }
1853       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1854       for (j = 0; j < mat->cmap->N; j++) {
1855         if (tmp2[j] > *norm) *norm = tmp2[j];
1856       }
1857       PetscCall(PetscFree(tmp));
1858       PetscCall(PetscFree(tmp2));
1859       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1860     } else if (type == NORM_INFINITY) { /* max row norm */
1861       PetscReal ntemp = 0.0;
1862       for (j = 0; j < aij->A->rmap->n; j++) {
1863         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1864         sum = 0.0;
1865         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1866           sum += PetscAbsScalar(*v);
1867           v++;
1868         }
1869         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1870         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1871           sum += PetscAbsScalar(*v);
1872           v++;
1873         }
1874         if (sum > ntemp) ntemp = sum;
1875       }
1876       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1877       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1878     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1879     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1880     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1881   }
1882   PetscFunctionReturn(PETSC_SUCCESS);
1883 }
1884 
1885 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1886 {
1887   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1888   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1889   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1890   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1891   Mat              B, A_diag, *B_diag;
1892   const MatScalar *pbv, *bv;
1893 
1894   PetscFunctionBegin;
1895   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1896   ma = A->rmap->n;
1897   na = A->cmap->n;
1898   mb = a->B->rmap->n;
1899   nb = a->B->cmap->n;
1900   ai = Aloc->i;
1901   aj = Aloc->j;
1902   bi = Bloc->i;
1903   bj = Bloc->j;
1904   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1905     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1906     PetscSFNode         *oloc;
1907     PETSC_UNUSED PetscSF sf;
1908 
1909     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1910     /* compute d_nnz for preallocation */
1911     PetscCall(PetscArrayzero(d_nnz, na));
1912     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1913     /* compute local off-diagonal contributions */
1914     PetscCall(PetscArrayzero(g_nnz, nb));
1915     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1916     /* map those to global */
1917     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1918     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1919     PetscCall(PetscSFSetFromOptions(sf));
1920     PetscCall(PetscArrayzero(o_nnz, na));
1921     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1922     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1923     PetscCall(PetscSFDestroy(&sf));
1924 
1925     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1926     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1927     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1928     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1929     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1930     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1931   } else {
1932     B = *matout;
1933     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1934   }
1935 
1936   b           = (Mat_MPIAIJ *)B->data;
1937   A_diag      = a->A;
1938   B_diag      = &b->A;
1939   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1940   A_diag_ncol = A_diag->cmap->N;
1941   B_diag_ilen = sub_B_diag->ilen;
1942   B_diag_i    = sub_B_diag->i;
1943 
1944   /* Set ilen for diagonal of B */
1945   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1946 
1947   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1948   very quickly (=without using MatSetValues), because all writes are local. */
1949   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1950   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1951 
1952   /* copy over the B part */
1953   PetscCall(PetscMalloc1(bi[mb], &cols));
1954   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1955   pbv = bv;
1956   row = A->rmap->rstart;
1957   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1958   cols_tmp = cols;
1959   for (i = 0; i < mb; i++) {
1960     ncol = bi[i + 1] - bi[i];
1961     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1962     row++;
1963     if (pbv) pbv += ncol;
1964     if (cols_tmp) cols_tmp += ncol;
1965   }
1966   PetscCall(PetscFree(cols));
1967   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1968 
1969   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1970   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1971   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1972     *matout = B;
1973   } else {
1974     PetscCall(MatHeaderMerge(A, &B));
1975   }
1976   PetscFunctionReturn(PETSC_SUCCESS);
1977 }
1978 
1979 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1980 {
1981   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1982   Mat         a = aij->A, b = aij->B;
1983   PetscInt    s1, s2, s3;
1984 
1985   PetscFunctionBegin;
1986   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1987   if (rr) {
1988     PetscCall(VecGetLocalSize(rr, &s1));
1989     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1990     /* Overlap communication with computation. */
1991     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1992   }
1993   if (ll) {
1994     PetscCall(VecGetLocalSize(ll, &s1));
1995     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1996     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1997   }
1998   /* scale  the diagonal block */
1999   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2000 
2001   if (rr) {
2002     /* Do a scatter end and then right scale the off-diagonal block */
2003     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2004     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2005   }
2006   PetscFunctionReturn(PETSC_SUCCESS);
2007 }
2008 
2009 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2010 {
2011   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2012 
2013   PetscFunctionBegin;
2014   PetscCall(MatSetUnfactored(a->A));
2015   PetscFunctionReturn(PETSC_SUCCESS);
2016 }
2017 
2018 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2019 {
2020   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2021   Mat         a, b, c, d;
2022   PetscBool   flg;
2023 
2024   PetscFunctionBegin;
2025   a = matA->A;
2026   b = matA->B;
2027   c = matB->A;
2028   d = matB->B;
2029 
2030   PetscCall(MatEqual(a, c, &flg));
2031   if (flg) PetscCall(MatEqual(b, d, &flg));
2032   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2033   PetscFunctionReturn(PETSC_SUCCESS);
2034 }
2035 
2036 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2037 {
2038   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2039   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2040 
2041   PetscFunctionBegin;
2042   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2043   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2044     /* because of the column compression in the off-processor part of the matrix a->B,
2045        the number of columns in a->B and b->B may be different, hence we cannot call
2046        the MatCopy() directly on the two parts. If need be, we can provide a more
2047        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2048        then copying the submatrices */
2049     PetscCall(MatCopy_Basic(A, B, str));
2050   } else {
2051     PetscCall(MatCopy(a->A, b->A, str));
2052     PetscCall(MatCopy(a->B, b->B, str));
2053   }
2054   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2055   PetscFunctionReturn(PETSC_SUCCESS);
2056 }
2057 
2058 /*
2059    Computes the number of nonzeros per row needed for preallocation when X and Y
2060    have different nonzero structure.
2061 */
2062 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2063 {
2064   PetscInt i, j, k, nzx, nzy;
2065 
2066   PetscFunctionBegin;
2067   /* Set the number of nonzeros in the new matrix */
2068   for (i = 0; i < m; i++) {
2069     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2070     nzx    = xi[i + 1] - xi[i];
2071     nzy    = yi[i + 1] - yi[i];
2072     nnz[i] = 0;
2073     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2074       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2075       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2076       nnz[i]++;
2077     }
2078     for (; k < nzy; k++) nnz[i]++;
2079   }
2080   PetscFunctionReturn(PETSC_SUCCESS);
2081 }
2082 
2083 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2084 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2085 {
2086   PetscInt    m = Y->rmap->N;
2087   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2088   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2089 
2090   PetscFunctionBegin;
2091   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2092   PetscFunctionReturn(PETSC_SUCCESS);
2093 }
2094 
2095 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2096 {
2097   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2098 
2099   PetscFunctionBegin;
2100   if (str == SAME_NONZERO_PATTERN) {
2101     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2102     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2103   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2104     PetscCall(MatAXPY_Basic(Y, a, X, str));
2105   } else {
2106     Mat       B;
2107     PetscInt *nnz_d, *nnz_o;
2108 
2109     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2110     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2111     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2112     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2113     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2114     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2115     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2116     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2117     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2118     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2119     PetscCall(MatHeaderMerge(Y, &B));
2120     PetscCall(PetscFree(nnz_d));
2121     PetscCall(PetscFree(nnz_o));
2122   }
2123   PetscFunctionReturn(PETSC_SUCCESS);
2124 }
2125 
2126 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2127 
2128 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2129 {
2130   PetscFunctionBegin;
2131   if (PetscDefined(USE_COMPLEX)) {
2132     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2133 
2134     PetscCall(MatConjugate_SeqAIJ(aij->A));
2135     PetscCall(MatConjugate_SeqAIJ(aij->B));
2136   }
2137   PetscFunctionReturn(PETSC_SUCCESS);
2138 }
2139 
2140 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2141 {
2142   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2143 
2144   PetscFunctionBegin;
2145   PetscCall(MatRealPart(a->A));
2146   PetscCall(MatRealPart(a->B));
2147   PetscFunctionReturn(PETSC_SUCCESS);
2148 }
2149 
2150 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2151 {
2152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(MatImaginaryPart(a->A));
2156   PetscCall(MatImaginaryPart(a->B));
2157   PetscFunctionReturn(PETSC_SUCCESS);
2158 }
2159 
2160 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2161 {
2162   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2163   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2164   PetscScalar       *va, *vv;
2165   Vec                vB, vA;
2166   const PetscScalar *vb;
2167 
2168   PetscFunctionBegin;
2169   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2170   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2171 
2172   PetscCall(VecGetArrayWrite(vA, &va));
2173   if (idx) {
2174     for (i = 0; i < m; i++) {
2175       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2176     }
2177   }
2178 
2179   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2180   PetscCall(PetscMalloc1(m, &idxb));
2181   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2182 
2183   PetscCall(VecGetArrayWrite(v, &vv));
2184   PetscCall(VecGetArrayRead(vB, &vb));
2185   for (i = 0; i < m; i++) {
2186     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2187       vv[i] = vb[i];
2188       if (idx) idx[i] = a->garray[idxb[i]];
2189     } else {
2190       vv[i] = va[i];
2191       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2192     }
2193   }
2194   PetscCall(VecRestoreArrayWrite(vA, &vv));
2195   PetscCall(VecRestoreArrayWrite(vA, &va));
2196   PetscCall(VecRestoreArrayRead(vB, &vb));
2197   PetscCall(PetscFree(idxb));
2198   PetscCall(VecDestroy(&vA));
2199   PetscCall(VecDestroy(&vB));
2200   PetscFunctionReturn(PETSC_SUCCESS);
2201 }
2202 
2203 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2204 {
2205   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2206   PetscInt    m = A->rmap->n;
2207   Vec         vB, vA;
2208 
2209   PetscFunctionBegin;
2210   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2211   PetscCall(MatGetRowSumAbs(a->A, vA));
2212   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2213   PetscCall(MatGetRowSumAbs(a->B, vB));
2214   PetscCall(VecAXPY(vA, 1.0, vB));
2215   PetscCall(VecDestroy(&vB));
2216   PetscCall(VecCopy(vA, v));
2217   PetscCall(VecDestroy(&vA));
2218   PetscFunctionReturn(PETSC_SUCCESS);
2219 }
2220 
2221 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2222 {
2223   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2224   PetscInt           m = A->rmap->n, n = A->cmap->n;
2225   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2226   PetscInt          *cmap = mat->garray;
2227   PetscInt          *diagIdx, *offdiagIdx;
2228   Vec                diagV, offdiagV;
2229   PetscScalar       *a, *diagA, *offdiagA;
2230   const PetscScalar *ba, *bav;
2231   PetscInt           r, j, col, ncols, *bi, *bj;
2232   Mat                B = mat->B;
2233   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2234 
2235   PetscFunctionBegin;
2236   /* When a process holds entire A and other processes have no entry */
2237   if (A->cmap->N == n) {
2238     PetscCall(VecGetArrayWrite(v, &diagA));
2239     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2240     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2241     PetscCall(VecDestroy(&diagV));
2242     PetscCall(VecRestoreArrayWrite(v, &diagA));
2243     PetscFunctionReturn(PETSC_SUCCESS);
2244   } else if (n == 0) {
2245     if (m) {
2246       PetscCall(VecGetArrayWrite(v, &a));
2247       for (r = 0; r < m; r++) {
2248         a[r] = 0.0;
2249         if (idx) idx[r] = -1;
2250       }
2251       PetscCall(VecRestoreArrayWrite(v, &a));
2252     }
2253     PetscFunctionReturn(PETSC_SUCCESS);
2254   }
2255 
2256   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2257   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2258   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2259   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2260 
2261   /* Get offdiagIdx[] for implicit 0.0 */
2262   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2263   ba = bav;
2264   bi = b->i;
2265   bj = b->j;
2266   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2267   for (r = 0; r < m; r++) {
2268     ncols = bi[r + 1] - bi[r];
2269     if (ncols == A->cmap->N - n) { /* Brow is dense */
2270       offdiagA[r]   = *ba;
2271       offdiagIdx[r] = cmap[0];
2272     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2273       offdiagA[r] = 0.0;
2274 
2275       /* Find first hole in the cmap */
2276       for (j = 0; j < ncols; j++) {
2277         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2278         if (col > j && j < cstart) {
2279           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2280           break;
2281         } else if (col > j + n && j >= cstart) {
2282           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2283           break;
2284         }
2285       }
2286       if (j == ncols && ncols < A->cmap->N - n) {
2287         /* a hole is outside compressed Bcols */
2288         if (ncols == 0) {
2289           if (cstart) {
2290             offdiagIdx[r] = 0;
2291           } else offdiagIdx[r] = cend;
2292         } else { /* ncols > 0 */
2293           offdiagIdx[r] = cmap[ncols - 1] + 1;
2294           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2295         }
2296       }
2297     }
2298 
2299     for (j = 0; j < ncols; j++) {
2300       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2301         offdiagA[r]   = *ba;
2302         offdiagIdx[r] = cmap[*bj];
2303       }
2304       ba++;
2305       bj++;
2306     }
2307   }
2308 
2309   PetscCall(VecGetArrayWrite(v, &a));
2310   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2311   for (r = 0; r < m; ++r) {
2312     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2313       a[r] = diagA[r];
2314       if (idx) idx[r] = cstart + diagIdx[r];
2315     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2316       a[r] = diagA[r];
2317       if (idx) {
2318         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2319           idx[r] = cstart + diagIdx[r];
2320         } else idx[r] = offdiagIdx[r];
2321       }
2322     } else {
2323       a[r] = offdiagA[r];
2324       if (idx) idx[r] = offdiagIdx[r];
2325     }
2326   }
2327   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2328   PetscCall(VecRestoreArrayWrite(v, &a));
2329   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2330   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2331   PetscCall(VecDestroy(&diagV));
2332   PetscCall(VecDestroy(&offdiagV));
2333   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2334   PetscFunctionReturn(PETSC_SUCCESS);
2335 }
2336 
2337 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2338 {
2339   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2340   PetscInt           m = A->rmap->n, n = A->cmap->n;
2341   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2342   PetscInt          *cmap = mat->garray;
2343   PetscInt          *diagIdx, *offdiagIdx;
2344   Vec                diagV, offdiagV;
2345   PetscScalar       *a, *diagA, *offdiagA;
2346   const PetscScalar *ba, *bav;
2347   PetscInt           r, j, col, ncols, *bi, *bj;
2348   Mat                B = mat->B;
2349   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2350 
2351   PetscFunctionBegin;
2352   /* When a process holds entire A and other processes have no entry */
2353   if (A->cmap->N == n) {
2354     PetscCall(VecGetArrayWrite(v, &diagA));
2355     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2356     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2357     PetscCall(VecDestroy(&diagV));
2358     PetscCall(VecRestoreArrayWrite(v, &diagA));
2359     PetscFunctionReturn(PETSC_SUCCESS);
2360   } else if (n == 0) {
2361     if (m) {
2362       PetscCall(VecGetArrayWrite(v, &a));
2363       for (r = 0; r < m; r++) {
2364         a[r] = PETSC_MAX_REAL;
2365         if (idx) idx[r] = -1;
2366       }
2367       PetscCall(VecRestoreArrayWrite(v, &a));
2368     }
2369     PetscFunctionReturn(PETSC_SUCCESS);
2370   }
2371 
2372   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2373   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2374   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2375   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2376 
2377   /* Get offdiagIdx[] for implicit 0.0 */
2378   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2379   ba = bav;
2380   bi = b->i;
2381   bj = b->j;
2382   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2383   for (r = 0; r < m; r++) {
2384     ncols = bi[r + 1] - bi[r];
2385     if (ncols == A->cmap->N - n) { /* Brow is dense */
2386       offdiagA[r]   = *ba;
2387       offdiagIdx[r] = cmap[0];
2388     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2389       offdiagA[r] = 0.0;
2390 
2391       /* Find first hole in the cmap */
2392       for (j = 0; j < ncols; j++) {
2393         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2394         if (col > j && j < cstart) {
2395           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2396           break;
2397         } else if (col > j + n && j >= cstart) {
2398           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2399           break;
2400         }
2401       }
2402       if (j == ncols && ncols < A->cmap->N - n) {
2403         /* a hole is outside compressed Bcols */
2404         if (ncols == 0) {
2405           if (cstart) {
2406             offdiagIdx[r] = 0;
2407           } else offdiagIdx[r] = cend;
2408         } else { /* ncols > 0 */
2409           offdiagIdx[r] = cmap[ncols - 1] + 1;
2410           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2411         }
2412       }
2413     }
2414 
2415     for (j = 0; j < ncols; j++) {
2416       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2417         offdiagA[r]   = *ba;
2418         offdiagIdx[r] = cmap[*bj];
2419       }
2420       ba++;
2421       bj++;
2422     }
2423   }
2424 
2425   PetscCall(VecGetArrayWrite(v, &a));
2426   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2427   for (r = 0; r < m; ++r) {
2428     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2429       a[r] = diagA[r];
2430       if (idx) idx[r] = cstart + diagIdx[r];
2431     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2432       a[r] = diagA[r];
2433       if (idx) {
2434         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2435           idx[r] = cstart + diagIdx[r];
2436         } else idx[r] = offdiagIdx[r];
2437       }
2438     } else {
2439       a[r] = offdiagA[r];
2440       if (idx) idx[r] = offdiagIdx[r];
2441     }
2442   }
2443   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2444   PetscCall(VecRestoreArrayWrite(v, &a));
2445   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2446   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2447   PetscCall(VecDestroy(&diagV));
2448   PetscCall(VecDestroy(&offdiagV));
2449   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2450   PetscFunctionReturn(PETSC_SUCCESS);
2451 }
2452 
2453 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2454 {
2455   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2456   PetscInt           m = A->rmap->n, n = A->cmap->n;
2457   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2458   PetscInt          *cmap = mat->garray;
2459   PetscInt          *diagIdx, *offdiagIdx;
2460   Vec                diagV, offdiagV;
2461   PetscScalar       *a, *diagA, *offdiagA;
2462   const PetscScalar *ba, *bav;
2463   PetscInt           r, j, col, ncols, *bi, *bj;
2464   Mat                B = mat->B;
2465   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2466 
2467   PetscFunctionBegin;
2468   /* When a process holds entire A and other processes have no entry */
2469   if (A->cmap->N == n) {
2470     PetscCall(VecGetArrayWrite(v, &diagA));
2471     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2472     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2473     PetscCall(VecDestroy(&diagV));
2474     PetscCall(VecRestoreArrayWrite(v, &diagA));
2475     PetscFunctionReturn(PETSC_SUCCESS);
2476   } else if (n == 0) {
2477     if (m) {
2478       PetscCall(VecGetArrayWrite(v, &a));
2479       for (r = 0; r < m; r++) {
2480         a[r] = PETSC_MIN_REAL;
2481         if (idx) idx[r] = -1;
2482       }
2483       PetscCall(VecRestoreArrayWrite(v, &a));
2484     }
2485     PetscFunctionReturn(PETSC_SUCCESS);
2486   }
2487 
2488   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2489   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2490   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2491   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2492 
2493   /* Get offdiagIdx[] for implicit 0.0 */
2494   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2495   ba = bav;
2496   bi = b->i;
2497   bj = b->j;
2498   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2499   for (r = 0; r < m; r++) {
2500     ncols = bi[r + 1] - bi[r];
2501     if (ncols == A->cmap->N - n) { /* Brow is dense */
2502       offdiagA[r]   = *ba;
2503       offdiagIdx[r] = cmap[0];
2504     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2505       offdiagA[r] = 0.0;
2506 
2507       /* Find first hole in the cmap */
2508       for (j = 0; j < ncols; j++) {
2509         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2510         if (col > j && j < cstart) {
2511           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2512           break;
2513         } else if (col > j + n && j >= cstart) {
2514           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2515           break;
2516         }
2517       }
2518       if (j == ncols && ncols < A->cmap->N - n) {
2519         /* a hole is outside compressed Bcols */
2520         if (ncols == 0) {
2521           if (cstart) {
2522             offdiagIdx[r] = 0;
2523           } else offdiagIdx[r] = cend;
2524         } else { /* ncols > 0 */
2525           offdiagIdx[r] = cmap[ncols - 1] + 1;
2526           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2527         }
2528       }
2529     }
2530 
2531     for (j = 0; j < ncols; j++) {
2532       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2533         offdiagA[r]   = *ba;
2534         offdiagIdx[r] = cmap[*bj];
2535       }
2536       ba++;
2537       bj++;
2538     }
2539   }
2540 
2541   PetscCall(VecGetArrayWrite(v, &a));
2542   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2543   for (r = 0; r < m; ++r) {
2544     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2545       a[r] = diagA[r];
2546       if (idx) idx[r] = cstart + diagIdx[r];
2547     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2548       a[r] = diagA[r];
2549       if (idx) {
2550         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2551           idx[r] = cstart + diagIdx[r];
2552         } else idx[r] = offdiagIdx[r];
2553       }
2554     } else {
2555       a[r] = offdiagA[r];
2556       if (idx) idx[r] = offdiagIdx[r];
2557     }
2558   }
2559   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2560   PetscCall(VecRestoreArrayWrite(v, &a));
2561   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2562   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2563   PetscCall(VecDestroy(&diagV));
2564   PetscCall(VecDestroy(&offdiagV));
2565   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2566   PetscFunctionReturn(PETSC_SUCCESS);
2567 }
2568 
2569 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2570 {
2571   Mat *dummy;
2572 
2573   PetscFunctionBegin;
2574   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2575   *newmat = *dummy;
2576   PetscCall(PetscFree(dummy));
2577   PetscFunctionReturn(PETSC_SUCCESS);
2578 }
2579 
2580 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2581 {
2582   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2583 
2584   PetscFunctionBegin;
2585   PetscCall(MatInvertBlockDiagonal(a->A, values));
2586   A->factorerrortype = a->A->factorerrortype;
2587   PetscFunctionReturn(PETSC_SUCCESS);
2588 }
2589 
2590 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2591 {
2592   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2593 
2594   PetscFunctionBegin;
2595   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2596   PetscCall(MatSetRandom(aij->A, rctx));
2597   if (x->assembled) {
2598     PetscCall(MatSetRandom(aij->B, rctx));
2599   } else {
2600     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2601   }
2602   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2603   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2604   PetscFunctionReturn(PETSC_SUCCESS);
2605 }
2606 
2607 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2608 {
2609   PetscFunctionBegin;
2610   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2611   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2612   PetscFunctionReturn(PETSC_SUCCESS);
2613 }
2614 
2615 /*@
2616   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2617 
2618   Not Collective
2619 
2620   Input Parameter:
2621 . A - the matrix
2622 
2623   Output Parameter:
2624 . nz - the number of nonzeros
2625 
2626   Level: advanced
2627 
2628 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2629 @*/
2630 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2631 {
2632   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2633   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2634   PetscBool   isaij;
2635 
2636   PetscFunctionBegin;
2637   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2638   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2639   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2640   PetscFunctionReturn(PETSC_SUCCESS);
2641 }
2642 
2643 /*@
2644   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2645 
2646   Collective
2647 
2648   Input Parameters:
2649 + A  - the matrix
2650 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2651 
2652   Level: advanced
2653 
2654 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2655 @*/
2656 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2657 {
2658   PetscFunctionBegin;
2659   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2660   PetscFunctionReturn(PETSC_SUCCESS);
2661 }
2662 
2663 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2664 {
2665   PetscBool sc = PETSC_FALSE, flg;
2666 
2667   PetscFunctionBegin;
2668   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2669   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2670   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2671   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2672   PetscOptionsHeadEnd();
2673   PetscFunctionReturn(PETSC_SUCCESS);
2674 }
2675 
2676 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2677 {
2678   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2679   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2680 
2681   PetscFunctionBegin;
2682   if (!Y->preallocated) {
2683     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2684   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2685     PetscInt nonew = aij->nonew;
2686     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2687     aij->nonew = nonew;
2688   }
2689   PetscCall(MatShift_Basic(Y, a));
2690   PetscFunctionReturn(PETSC_SUCCESS);
2691 }
2692 
2693 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2694 {
2695   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2696 
2697   PetscFunctionBegin;
2698   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2699   PetscCall(MatMissingDiagonal(a->A, missing, d));
2700   if (d) {
2701     PetscInt rstart;
2702     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2703     *d += rstart;
2704   }
2705   PetscFunctionReturn(PETSC_SUCCESS);
2706 }
2707 
2708 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2709 {
2710   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2711 
2712   PetscFunctionBegin;
2713   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2714   PetscFunctionReturn(PETSC_SUCCESS);
2715 }
2716 
2717 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2718 {
2719   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2720 
2721   PetscFunctionBegin;
2722   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2723   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2724   PetscFunctionReturn(PETSC_SUCCESS);
2725 }
2726 
2727 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2728                                        MatGetRow_MPIAIJ,
2729                                        MatRestoreRow_MPIAIJ,
2730                                        MatMult_MPIAIJ,
2731                                        /* 4*/ MatMultAdd_MPIAIJ,
2732                                        MatMultTranspose_MPIAIJ,
2733                                        MatMultTransposeAdd_MPIAIJ,
2734                                        NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        /*10*/ NULL,
2738                                        NULL,
2739                                        NULL,
2740                                        MatSOR_MPIAIJ,
2741                                        MatTranspose_MPIAIJ,
2742                                        /*15*/ MatGetInfo_MPIAIJ,
2743                                        MatEqual_MPIAIJ,
2744                                        MatGetDiagonal_MPIAIJ,
2745                                        MatDiagonalScale_MPIAIJ,
2746                                        MatNorm_MPIAIJ,
2747                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2748                                        MatAssemblyEnd_MPIAIJ,
2749                                        MatSetOption_MPIAIJ,
2750                                        MatZeroEntries_MPIAIJ,
2751                                        /*24*/ MatZeroRows_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        /*29*/ MatSetUp_MPI_Hash,
2757                                        NULL,
2758                                        NULL,
2759                                        MatGetDiagonalBlock_MPIAIJ,
2760                                        NULL,
2761                                        /*34*/ MatDuplicate_MPIAIJ,
2762                                        NULL,
2763                                        NULL,
2764                                        NULL,
2765                                        NULL,
2766                                        /*39*/ MatAXPY_MPIAIJ,
2767                                        MatCreateSubMatrices_MPIAIJ,
2768                                        MatIncreaseOverlap_MPIAIJ,
2769                                        MatGetValues_MPIAIJ,
2770                                        MatCopy_MPIAIJ,
2771                                        /*44*/ MatGetRowMax_MPIAIJ,
2772                                        MatScale_MPIAIJ,
2773                                        MatShift_MPIAIJ,
2774                                        MatDiagonalSet_MPIAIJ,
2775                                        MatZeroRowsColumns_MPIAIJ,
2776                                        /*49*/ MatSetRandom_MPIAIJ,
2777                                        MatGetRowIJ_MPIAIJ,
2778                                        MatRestoreRowIJ_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2782                                        NULL,
2783                                        MatSetUnfactored_MPIAIJ,
2784                                        MatPermute_MPIAIJ,
2785                                        NULL,
2786                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2787                                        MatDestroy_MPIAIJ,
2788                                        MatView_MPIAIJ,
2789                                        NULL,
2790                                        NULL,
2791                                        /*64*/ NULL,
2792                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2793                                        NULL,
2794                                        NULL,
2795                                        NULL,
2796                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2797                                        MatGetRowMinAbs_MPIAIJ,
2798                                        NULL,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        /*75*/ MatFDColoringApply_AIJ,
2803                                        MatSetFromOptions_MPIAIJ,
2804                                        NULL,
2805                                        NULL,
2806                                        MatFindZeroDiagonals_MPIAIJ,
2807                                        /*80*/ NULL,
2808                                        NULL,
2809                                        NULL,
2810                                        /*83*/ MatLoad_MPIAIJ,
2811                                        MatIsSymmetric_MPIAIJ,
2812                                        NULL,
2813                                        NULL,
2814                                        NULL,
2815                                        NULL,
2816                                        /*89*/ NULL,
2817                                        NULL,
2818                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2822                                        NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        MatBindToCPU_MPIAIJ,
2826                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2827                                        NULL,
2828                                        NULL,
2829                                        MatConjugate_MPIAIJ,
2830                                        NULL,
2831                                        /*104*/ MatSetValuesRow_MPIAIJ,
2832                                        MatRealPart_MPIAIJ,
2833                                        MatImaginaryPart_MPIAIJ,
2834                                        NULL,
2835                                        NULL,
2836                                        /*109*/ NULL,
2837                                        NULL,
2838                                        MatGetRowMin_MPIAIJ,
2839                                        NULL,
2840                                        MatMissingDiagonal_MPIAIJ,
2841                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2842                                        NULL,
2843                                        MatGetGhosts_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2847                                        NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        MatGetMultiProcBlock_MPIAIJ,
2851                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2852                                        MatGetColumnReductions_MPIAIJ,
2853                                        MatInvertBlockDiagonal_MPIAIJ,
2854                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2855                                        MatCreateSubMatricesMPI_MPIAIJ,
2856                                        /*129*/ NULL,
2857                                        NULL,
2858                                        NULL,
2859                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2860                                        NULL,
2861                                        /*134*/ NULL,
2862                                        NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        NULL,
2866                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2867                                        NULL,
2868                                        NULL,
2869                                        MatFDColoringSetUp_MPIXAIJ,
2870                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2871                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2872                                        /*145*/ NULL,
2873                                        NULL,
2874                                        NULL,
2875                                        MatCreateGraph_Simple_AIJ,
2876                                        NULL,
2877                                        /*150*/ NULL,
2878                                        MatEliminateZeros_MPIAIJ,
2879                                        MatGetRowSumAbs_MPIAIJ};
2880 
2881 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2882 {
2883   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2884 
2885   PetscFunctionBegin;
2886   PetscCall(MatStoreValues(aij->A));
2887   PetscCall(MatStoreValues(aij->B));
2888   PetscFunctionReturn(PETSC_SUCCESS);
2889 }
2890 
2891 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2892 {
2893   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2894 
2895   PetscFunctionBegin;
2896   PetscCall(MatRetrieveValues(aij->A));
2897   PetscCall(MatRetrieveValues(aij->B));
2898   PetscFunctionReturn(PETSC_SUCCESS);
2899 }
2900 
2901 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2902 {
2903   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2904   PetscMPIInt size;
2905 
2906   PetscFunctionBegin;
2907   if (B->hash_active) {
2908     B->ops[0]      = b->cops;
2909     B->hash_active = PETSC_FALSE;
2910   }
2911   PetscCall(PetscLayoutSetUp(B->rmap));
2912   PetscCall(PetscLayoutSetUp(B->cmap));
2913 
2914 #if defined(PETSC_USE_CTABLE)
2915   PetscCall(PetscHMapIDestroy(&b->colmap));
2916 #else
2917   PetscCall(PetscFree(b->colmap));
2918 #endif
2919   PetscCall(PetscFree(b->garray));
2920   PetscCall(VecDestroy(&b->lvec));
2921   PetscCall(VecScatterDestroy(&b->Mvctx));
2922 
2923   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2924   PetscCall(MatDestroy(&b->B));
2925   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2926   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2927   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2928   PetscCall(MatSetType(b->B, MATSEQAIJ));
2929 
2930   PetscCall(MatDestroy(&b->A));
2931   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2932   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2933   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2934   PetscCall(MatSetType(b->A, MATSEQAIJ));
2935 
2936   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2937   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2938   B->preallocated  = PETSC_TRUE;
2939   B->was_assembled = PETSC_FALSE;
2940   B->assembled     = PETSC_FALSE;
2941   PetscFunctionReturn(PETSC_SUCCESS);
2942 }
2943 
2944 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2945 {
2946   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2947 
2948   PetscFunctionBegin;
2949   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2950   PetscCall(PetscLayoutSetUp(B->rmap));
2951   PetscCall(PetscLayoutSetUp(B->cmap));
2952 
2953 #if defined(PETSC_USE_CTABLE)
2954   PetscCall(PetscHMapIDestroy(&b->colmap));
2955 #else
2956   PetscCall(PetscFree(b->colmap));
2957 #endif
2958   PetscCall(PetscFree(b->garray));
2959   PetscCall(VecDestroy(&b->lvec));
2960   PetscCall(VecScatterDestroy(&b->Mvctx));
2961 
2962   PetscCall(MatResetPreallocation(b->A));
2963   PetscCall(MatResetPreallocation(b->B));
2964   B->preallocated  = PETSC_TRUE;
2965   B->was_assembled = PETSC_FALSE;
2966   B->assembled     = PETSC_FALSE;
2967   PetscFunctionReturn(PETSC_SUCCESS);
2968 }
2969 
2970 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2971 {
2972   Mat         mat;
2973   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2974 
2975   PetscFunctionBegin;
2976   *newmat = NULL;
2977   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2978   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2979   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2980   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2981   a = (Mat_MPIAIJ *)mat->data;
2982 
2983   mat->factortype = matin->factortype;
2984   mat->assembled  = matin->assembled;
2985   mat->insertmode = NOT_SET_VALUES;
2986 
2987   a->size         = oldmat->size;
2988   a->rank         = oldmat->rank;
2989   a->donotstash   = oldmat->donotstash;
2990   a->roworiented  = oldmat->roworiented;
2991   a->rowindices   = NULL;
2992   a->rowvalues    = NULL;
2993   a->getrowactive = PETSC_FALSE;
2994 
2995   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2996   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2997   if (matin->hash_active) {
2998     PetscCall(MatSetUp(mat));
2999   } else {
3000     mat->preallocated = matin->preallocated;
3001     if (oldmat->colmap) {
3002 #if defined(PETSC_USE_CTABLE)
3003       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3004 #else
3005       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3006       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3007 #endif
3008     } else a->colmap = NULL;
3009     if (oldmat->garray) {
3010       PetscInt len;
3011       len = oldmat->B->cmap->n;
3012       PetscCall(PetscMalloc1(len + 1, &a->garray));
3013       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3014     } else a->garray = NULL;
3015 
3016     /* It may happen MatDuplicate is called with a non-assembled matrix
3017       In fact, MatDuplicate only requires the matrix to be preallocated
3018       This may happen inside a DMCreateMatrix_Shell */
3019     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3020     if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3021     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3022     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3023   }
3024   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3025   *newmat = mat;
3026   PetscFunctionReturn(PETSC_SUCCESS);
3027 }
3028 
3029 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3030 {
3031   PetscBool isbinary, ishdf5;
3032 
3033   PetscFunctionBegin;
3034   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3035   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3036   /* force binary viewer to load .info file if it has not yet done so */
3037   PetscCall(PetscViewerSetUp(viewer));
3038   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3039   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3040   if (isbinary) {
3041     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3042   } else if (ishdf5) {
3043 #if defined(PETSC_HAVE_HDF5)
3044     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3045 #else
3046     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3047 #endif
3048   } else {
3049     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3050   }
3051   PetscFunctionReturn(PETSC_SUCCESS);
3052 }
3053 
3054 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3055 {
3056   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3057   PetscInt    *rowidxs, *colidxs;
3058   PetscScalar *matvals;
3059 
3060   PetscFunctionBegin;
3061   PetscCall(PetscViewerSetUp(viewer));
3062 
3063   /* read in matrix header */
3064   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3065   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3066   M  = header[1];
3067   N  = header[2];
3068   nz = header[3];
3069   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3070   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3071   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3072 
3073   /* set block sizes from the viewer's .info file */
3074   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3075   /* set global sizes if not set already */
3076   if (mat->rmap->N < 0) mat->rmap->N = M;
3077   if (mat->cmap->N < 0) mat->cmap->N = N;
3078   PetscCall(PetscLayoutSetUp(mat->rmap));
3079   PetscCall(PetscLayoutSetUp(mat->cmap));
3080 
3081   /* check if the matrix sizes are correct */
3082   PetscCall(MatGetSize(mat, &rows, &cols));
3083   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3084 
3085   /* read in row lengths and build row indices */
3086   PetscCall(MatGetLocalSize(mat, &m, NULL));
3087   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3088   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3089   rowidxs[0] = 0;
3090   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3091   if (nz != PETSC_MAX_INT) {
3092     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3093     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3094   }
3095 
3096   /* read in column indices and matrix values */
3097   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3098   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3099   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3100   /* store matrix indices and values */
3101   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3102   PetscCall(PetscFree(rowidxs));
3103   PetscCall(PetscFree2(colidxs, matvals));
3104   PetscFunctionReturn(PETSC_SUCCESS);
3105 }
3106 
3107 /* Not scalable because of ISAllGather() unless getting all columns. */
3108 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3109 {
3110   IS          iscol_local;
3111   PetscBool   isstride;
3112   PetscMPIInt lisstride = 0, gisstride;
3113 
3114   PetscFunctionBegin;
3115   /* check if we are grabbing all columns*/
3116   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3117 
3118   if (isstride) {
3119     PetscInt start, len, mstart, mlen;
3120     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3121     PetscCall(ISGetLocalSize(iscol, &len));
3122     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3123     if (mstart == start && mlen - mstart == len) lisstride = 1;
3124   }
3125 
3126   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3127   if (gisstride) {
3128     PetscInt N;
3129     PetscCall(MatGetSize(mat, NULL, &N));
3130     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3131     PetscCall(ISSetIdentity(iscol_local));
3132     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3133   } else {
3134     PetscInt cbs;
3135     PetscCall(ISGetBlockSize(iscol, &cbs));
3136     PetscCall(ISAllGather(iscol, &iscol_local));
3137     PetscCall(ISSetBlockSize(iscol_local, cbs));
3138   }
3139 
3140   *isseq = iscol_local;
3141   PetscFunctionReturn(PETSC_SUCCESS);
3142 }
3143 
3144 /*
3145  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3146  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3147 
3148  Input Parameters:
3149 +   mat - matrix
3150 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3151            i.e., mat->rstart <= isrow[i] < mat->rend
3152 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3153            i.e., mat->cstart <= iscol[i] < mat->cend
3154 
3155  Output Parameters:
3156 +   isrow_d - sequential row index set for retrieving mat->A
3157 .   iscol_d - sequential  column index set for retrieving mat->A
3158 .   iscol_o - sequential column index set for retrieving mat->B
3159 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3160  */
3161 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3162 {
3163   Vec             x, cmap;
3164   const PetscInt *is_idx;
3165   PetscScalar    *xarray, *cmaparray;
3166   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3167   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3168   Mat             B    = a->B;
3169   Vec             lvec = a->lvec, lcmap;
3170   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3171   MPI_Comm        comm;
3172   VecScatter      Mvctx = a->Mvctx;
3173 
3174   PetscFunctionBegin;
3175   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3176   PetscCall(ISGetLocalSize(iscol, &ncols));
3177 
3178   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3179   PetscCall(MatCreateVecs(mat, &x, NULL));
3180   PetscCall(VecSet(x, -1.0));
3181   PetscCall(VecDuplicate(x, &cmap));
3182   PetscCall(VecSet(cmap, -1.0));
3183 
3184   /* Get start indices */
3185   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3186   isstart -= ncols;
3187   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3188 
3189   PetscCall(ISGetIndices(iscol, &is_idx));
3190   PetscCall(VecGetArray(x, &xarray));
3191   PetscCall(VecGetArray(cmap, &cmaparray));
3192   PetscCall(PetscMalloc1(ncols, &idx));
3193   for (i = 0; i < ncols; i++) {
3194     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3195     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3196     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3197   }
3198   PetscCall(VecRestoreArray(x, &xarray));
3199   PetscCall(VecRestoreArray(cmap, &cmaparray));
3200   PetscCall(ISRestoreIndices(iscol, &is_idx));
3201 
3202   /* Get iscol_d */
3203   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3204   PetscCall(ISGetBlockSize(iscol, &i));
3205   PetscCall(ISSetBlockSize(*iscol_d, i));
3206 
3207   /* Get isrow_d */
3208   PetscCall(ISGetLocalSize(isrow, &m));
3209   rstart = mat->rmap->rstart;
3210   PetscCall(PetscMalloc1(m, &idx));
3211   PetscCall(ISGetIndices(isrow, &is_idx));
3212   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3213   PetscCall(ISRestoreIndices(isrow, &is_idx));
3214 
3215   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3216   PetscCall(ISGetBlockSize(isrow, &i));
3217   PetscCall(ISSetBlockSize(*isrow_d, i));
3218 
3219   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3220   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3221   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3222 
3223   PetscCall(VecDuplicate(lvec, &lcmap));
3224 
3225   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3226   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3227 
3228   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3229   /* off-process column indices */
3230   count = 0;
3231   PetscCall(PetscMalloc1(Bn, &idx));
3232   PetscCall(PetscMalloc1(Bn, &cmap1));
3233 
3234   PetscCall(VecGetArray(lvec, &xarray));
3235   PetscCall(VecGetArray(lcmap, &cmaparray));
3236   for (i = 0; i < Bn; i++) {
3237     if (PetscRealPart(xarray[i]) > -1.0) {
3238       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3239       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3240       count++;
3241     }
3242   }
3243   PetscCall(VecRestoreArray(lvec, &xarray));
3244   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3245 
3246   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3247   /* cannot ensure iscol_o has same blocksize as iscol! */
3248 
3249   PetscCall(PetscFree(idx));
3250   *garray = cmap1;
3251 
3252   PetscCall(VecDestroy(&x));
3253   PetscCall(VecDestroy(&cmap));
3254   PetscCall(VecDestroy(&lcmap));
3255   PetscFunctionReturn(PETSC_SUCCESS);
3256 }
3257 
3258 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3259 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3260 {
3261   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3262   Mat         M = NULL;
3263   MPI_Comm    comm;
3264   IS          iscol_d, isrow_d, iscol_o;
3265   Mat         Asub = NULL, Bsub = NULL;
3266   PetscInt    n;
3267 
3268   PetscFunctionBegin;
3269   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3270 
3271   if (call == MAT_REUSE_MATRIX) {
3272     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3273     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3274     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3275 
3276     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3277     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3278 
3279     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3280     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3281 
3282     /* Update diagonal and off-diagonal portions of submat */
3283     asub = (Mat_MPIAIJ *)(*submat)->data;
3284     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3285     PetscCall(ISGetLocalSize(iscol_o, &n));
3286     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3287     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3288     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3289 
3290   } else { /* call == MAT_INITIAL_MATRIX) */
3291     const PetscInt *garray;
3292     PetscInt        BsubN;
3293 
3294     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3295     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3296 
3297     /* Create local submatrices Asub and Bsub */
3298     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3299     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3300 
3301     /* Create submatrix M */
3302     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3303 
3304     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3305     asub = (Mat_MPIAIJ *)M->data;
3306 
3307     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3308     n = asub->B->cmap->N;
3309     if (BsubN > n) {
3310       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3311       const PetscInt *idx;
3312       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3313       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3314 
3315       PetscCall(PetscMalloc1(n, &idx_new));
3316       j = 0;
3317       PetscCall(ISGetIndices(iscol_o, &idx));
3318       for (i = 0; i < n; i++) {
3319         if (j >= BsubN) break;
3320         while (subgarray[i] > garray[j]) j++;
3321 
3322         if (subgarray[i] == garray[j]) {
3323           idx_new[i] = idx[j++];
3324         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3325       }
3326       PetscCall(ISRestoreIndices(iscol_o, &idx));
3327 
3328       PetscCall(ISDestroy(&iscol_o));
3329       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3330 
3331     } else if (BsubN < n) {
3332       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3333     }
3334 
3335     PetscCall(PetscFree(garray));
3336     *submat = M;
3337 
3338     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3339     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3340     PetscCall(ISDestroy(&isrow_d));
3341 
3342     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3343     PetscCall(ISDestroy(&iscol_d));
3344 
3345     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3346     PetscCall(ISDestroy(&iscol_o));
3347   }
3348   PetscFunctionReturn(PETSC_SUCCESS);
3349 }
3350 
3351 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3352 {
3353   IS        iscol_local = NULL, isrow_d;
3354   PetscInt  csize;
3355   PetscInt  n, i, j, start, end;
3356   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3357   MPI_Comm  comm;
3358 
3359   PetscFunctionBegin;
3360   /* If isrow has same processor distribution as mat,
3361      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3362   if (call == MAT_REUSE_MATRIX) {
3363     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3364     if (isrow_d) {
3365       sameRowDist  = PETSC_TRUE;
3366       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3367     } else {
3368       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3369       if (iscol_local) {
3370         sameRowDist  = PETSC_TRUE;
3371         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3372       }
3373     }
3374   } else {
3375     /* Check if isrow has same processor distribution as mat */
3376     sameDist[0] = PETSC_FALSE;
3377     PetscCall(ISGetLocalSize(isrow, &n));
3378     if (!n) {
3379       sameDist[0] = PETSC_TRUE;
3380     } else {
3381       PetscCall(ISGetMinMax(isrow, &i, &j));
3382       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3383       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3384     }
3385 
3386     /* Check if iscol has same processor distribution as mat */
3387     sameDist[1] = PETSC_FALSE;
3388     PetscCall(ISGetLocalSize(iscol, &n));
3389     if (!n) {
3390       sameDist[1] = PETSC_TRUE;
3391     } else {
3392       PetscCall(ISGetMinMax(iscol, &i, &j));
3393       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3394       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3395     }
3396 
3397     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3398     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3399     sameRowDist = tsameDist[0];
3400   }
3401 
3402   if (sameRowDist) {
3403     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3404       /* isrow and iscol have same processor distribution as mat */
3405       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3406       PetscFunctionReturn(PETSC_SUCCESS);
3407     } else { /* sameRowDist */
3408       /* isrow has same processor distribution as mat */
3409       if (call == MAT_INITIAL_MATRIX) {
3410         PetscBool sorted;
3411         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3412         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3413         PetscCall(ISGetSize(iscol, &i));
3414         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3415 
3416         PetscCall(ISSorted(iscol_local, &sorted));
3417         if (sorted) {
3418           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3419           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3420           PetscFunctionReturn(PETSC_SUCCESS);
3421         }
3422       } else { /* call == MAT_REUSE_MATRIX */
3423         IS iscol_sub;
3424         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3425         if (iscol_sub) {
3426           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3427           PetscFunctionReturn(PETSC_SUCCESS);
3428         }
3429       }
3430     }
3431   }
3432 
3433   /* General case: iscol -> iscol_local which has global size of iscol */
3434   if (call == MAT_REUSE_MATRIX) {
3435     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3436     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3437   } else {
3438     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3439   }
3440 
3441   PetscCall(ISGetLocalSize(iscol, &csize));
3442   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3443 
3444   if (call == MAT_INITIAL_MATRIX) {
3445     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3446     PetscCall(ISDestroy(&iscol_local));
3447   }
3448   PetscFunctionReturn(PETSC_SUCCESS);
3449 }
3450 
3451 /*@C
3452   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3453   and "off-diagonal" part of the matrix in CSR format.
3454 
3455   Collective
3456 
3457   Input Parameters:
3458 + comm   - MPI communicator
3459 . A      - "diagonal" portion of matrix
3460 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3461 - garray - global index of `B` columns
3462 
3463   Output Parameter:
3464 . mat - the matrix, with input `A` as its local diagonal matrix
3465 
3466   Level: advanced
3467 
3468   Notes:
3469   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3470 
3471   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3472 
3473 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3474 @*/
3475 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3476 {
3477   Mat_MPIAIJ        *maij;
3478   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3479   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3480   const PetscScalar *oa;
3481   Mat                Bnew;
3482   PetscInt           m, n, N;
3483   MatType            mpi_mat_type;
3484 
3485   PetscFunctionBegin;
3486   PetscCall(MatCreate(comm, mat));
3487   PetscCall(MatGetSize(A, &m, &n));
3488   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3489   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3490   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3491   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3492 
3493   /* Get global columns of mat */
3494   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3495 
3496   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3497   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3498   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3499   PetscCall(MatSetType(*mat, mpi_mat_type));
3500 
3501   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3502   maij = (Mat_MPIAIJ *)(*mat)->data;
3503 
3504   (*mat)->preallocated = PETSC_TRUE;
3505 
3506   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3507   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3508 
3509   /* Set A as diagonal portion of *mat */
3510   maij->A = A;
3511 
3512   nz = oi[m];
3513   for (i = 0; i < nz; i++) {
3514     col   = oj[i];
3515     oj[i] = garray[col];
3516   }
3517 
3518   /* Set Bnew as off-diagonal portion of *mat */
3519   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3520   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3521   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3522   bnew        = (Mat_SeqAIJ *)Bnew->data;
3523   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3524   maij->B     = Bnew;
3525 
3526   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3527 
3528   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3529   b->free_a       = PETSC_FALSE;
3530   b->free_ij      = PETSC_FALSE;
3531   PetscCall(MatDestroy(&B));
3532 
3533   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3534   bnew->free_a       = PETSC_TRUE;
3535   bnew->free_ij      = PETSC_TRUE;
3536 
3537   /* condense columns of maij->B */
3538   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3539   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3540   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3541   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3542   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3543   PetscFunctionReturn(PETSC_SUCCESS);
3544 }
3545 
3546 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3547 
3548 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3549 {
3550   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3551   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3552   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3553   Mat             M, Msub, B = a->B;
3554   MatScalar      *aa;
3555   Mat_SeqAIJ     *aij;
3556   PetscInt       *garray = a->garray, *colsub, Ncols;
3557   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3558   IS              iscol_sub, iscmap;
3559   const PetscInt *is_idx, *cmap;
3560   PetscBool       allcolumns = PETSC_FALSE;
3561   MPI_Comm        comm;
3562 
3563   PetscFunctionBegin;
3564   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3565   if (call == MAT_REUSE_MATRIX) {
3566     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3567     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3568     PetscCall(ISGetLocalSize(iscol_sub, &count));
3569 
3570     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3571     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3572 
3573     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3574     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3575 
3576     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3577 
3578   } else { /* call == MAT_INITIAL_MATRIX) */
3579     PetscBool flg;
3580 
3581     PetscCall(ISGetLocalSize(iscol, &n));
3582     PetscCall(ISGetSize(iscol, &Ncols));
3583 
3584     /* (1) iscol -> nonscalable iscol_local */
3585     /* Check for special case: each processor gets entire matrix columns */
3586     PetscCall(ISIdentity(iscol_local, &flg));
3587     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3588     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3589     if (allcolumns) {
3590       iscol_sub = iscol_local;
3591       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3592       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3593 
3594     } else {
3595       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3596       PetscInt *idx, *cmap1, k;
3597       PetscCall(PetscMalloc1(Ncols, &idx));
3598       PetscCall(PetscMalloc1(Ncols, &cmap1));
3599       PetscCall(ISGetIndices(iscol_local, &is_idx));
3600       count = 0;
3601       k     = 0;
3602       for (i = 0; i < Ncols; i++) {
3603         j = is_idx[i];
3604         if (j >= cstart && j < cend) {
3605           /* diagonal part of mat */
3606           idx[count]     = j;
3607           cmap1[count++] = i; /* column index in submat */
3608         } else if (Bn) {
3609           /* off-diagonal part of mat */
3610           if (j == garray[k]) {
3611             idx[count]     = j;
3612             cmap1[count++] = i; /* column index in submat */
3613           } else if (j > garray[k]) {
3614             while (j > garray[k] && k < Bn - 1) k++;
3615             if (j == garray[k]) {
3616               idx[count]     = j;
3617               cmap1[count++] = i; /* column index in submat */
3618             }
3619           }
3620         }
3621       }
3622       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3623 
3624       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3625       PetscCall(ISGetBlockSize(iscol, &cbs));
3626       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3627 
3628       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3629     }
3630 
3631     /* (3) Create sequential Msub */
3632     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3633   }
3634 
3635   PetscCall(ISGetLocalSize(iscol_sub, &count));
3636   aij = (Mat_SeqAIJ *)(Msub)->data;
3637   ii  = aij->i;
3638   PetscCall(ISGetIndices(iscmap, &cmap));
3639 
3640   /*
3641       m - number of local rows
3642       Ncols - number of columns (same on all processors)
3643       rstart - first row in new global matrix generated
3644   */
3645   PetscCall(MatGetSize(Msub, &m, NULL));
3646 
3647   if (call == MAT_INITIAL_MATRIX) {
3648     /* (4) Create parallel newmat */
3649     PetscMPIInt rank, size;
3650     PetscInt    csize;
3651 
3652     PetscCallMPI(MPI_Comm_size(comm, &size));
3653     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3654 
3655     /*
3656         Determine the number of non-zeros in the diagonal and off-diagonal
3657         portions of the matrix in order to do correct preallocation
3658     */
3659 
3660     /* first get start and end of "diagonal" columns */
3661     PetscCall(ISGetLocalSize(iscol, &csize));
3662     if (csize == PETSC_DECIDE) {
3663       PetscCall(ISGetSize(isrow, &mglobal));
3664       if (mglobal == Ncols) { /* square matrix */
3665         nlocal = m;
3666       } else {
3667         nlocal = Ncols / size + ((Ncols % size) > rank);
3668       }
3669     } else {
3670       nlocal = csize;
3671     }
3672     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3673     rstart = rend - nlocal;
3674     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3675 
3676     /* next, compute all the lengths */
3677     jj = aij->j;
3678     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3679     olens = dlens + m;
3680     for (i = 0; i < m; i++) {
3681       jend = ii[i + 1] - ii[i];
3682       olen = 0;
3683       dlen = 0;
3684       for (j = 0; j < jend; j++) {
3685         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3686         else dlen++;
3687         jj++;
3688       }
3689       olens[i] = olen;
3690       dlens[i] = dlen;
3691     }
3692 
3693     PetscCall(ISGetBlockSize(isrow, &bs));
3694     PetscCall(ISGetBlockSize(iscol, &cbs));
3695 
3696     PetscCall(MatCreate(comm, &M));
3697     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3698     PetscCall(MatSetBlockSizes(M, bs, cbs));
3699     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3700     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3701     PetscCall(PetscFree(dlens));
3702 
3703   } else { /* call == MAT_REUSE_MATRIX */
3704     M = *newmat;
3705     PetscCall(MatGetLocalSize(M, &i, NULL));
3706     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3707     PetscCall(MatZeroEntries(M));
3708     /*
3709          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3710        rather than the slower MatSetValues().
3711     */
3712     M->was_assembled = PETSC_TRUE;
3713     M->assembled     = PETSC_FALSE;
3714   }
3715 
3716   /* (5) Set values of Msub to *newmat */
3717   PetscCall(PetscMalloc1(count, &colsub));
3718   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3719 
3720   jj = aij->j;
3721   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3722   for (i = 0; i < m; i++) {
3723     row = rstart + i;
3724     nz  = ii[i + 1] - ii[i];
3725     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3726     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3727     jj += nz;
3728     aa += nz;
3729   }
3730   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3731   PetscCall(ISRestoreIndices(iscmap, &cmap));
3732 
3733   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3734   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3735 
3736   PetscCall(PetscFree(colsub));
3737 
3738   /* save Msub, iscol_sub and iscmap used in processor for next request */
3739   if (call == MAT_INITIAL_MATRIX) {
3740     *newmat = M;
3741     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3742     PetscCall(MatDestroy(&Msub));
3743 
3744     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3745     PetscCall(ISDestroy(&iscol_sub));
3746 
3747     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3748     PetscCall(ISDestroy(&iscmap));
3749 
3750     if (iscol_local) {
3751       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3752       PetscCall(ISDestroy(&iscol_local));
3753     }
3754   }
3755   PetscFunctionReturn(PETSC_SUCCESS);
3756 }
3757 
3758 /*
3759     Not great since it makes two copies of the submatrix, first an SeqAIJ
3760   in local and then by concatenating the local matrices the end result.
3761   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3762 
3763   This requires a sequential iscol with all indices.
3764 */
3765 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3766 {
3767   PetscMPIInt rank, size;
3768   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3769   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3770   Mat         M, Mreuse;
3771   MatScalar  *aa, *vwork;
3772   MPI_Comm    comm;
3773   Mat_SeqAIJ *aij;
3774   PetscBool   colflag, allcolumns = PETSC_FALSE;
3775 
3776   PetscFunctionBegin;
3777   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3778   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3779   PetscCallMPI(MPI_Comm_size(comm, &size));
3780 
3781   /* Check for special case: each processor gets entire matrix columns */
3782   PetscCall(ISIdentity(iscol, &colflag));
3783   PetscCall(ISGetLocalSize(iscol, &n));
3784   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3785   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3786 
3787   if (call == MAT_REUSE_MATRIX) {
3788     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3789     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3790     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3791   } else {
3792     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3793   }
3794 
3795   /*
3796       m - number of local rows
3797       n - number of columns (same on all processors)
3798       rstart - first row in new global matrix generated
3799   */
3800   PetscCall(MatGetSize(Mreuse, &m, &n));
3801   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3802   if (call == MAT_INITIAL_MATRIX) {
3803     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3804     ii  = aij->i;
3805     jj  = aij->j;
3806 
3807     /*
3808         Determine the number of non-zeros in the diagonal and off-diagonal
3809         portions of the matrix in order to do correct preallocation
3810     */
3811 
3812     /* first get start and end of "diagonal" columns */
3813     if (csize == PETSC_DECIDE) {
3814       PetscCall(ISGetSize(isrow, &mglobal));
3815       if (mglobal == n) { /* square matrix */
3816         nlocal = m;
3817       } else {
3818         nlocal = n / size + ((n % size) > rank);
3819       }
3820     } else {
3821       nlocal = csize;
3822     }
3823     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3824     rstart = rend - nlocal;
3825     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3826 
3827     /* next, compute all the lengths */
3828     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3829     olens = dlens + m;
3830     for (i = 0; i < m; i++) {
3831       jend = ii[i + 1] - ii[i];
3832       olen = 0;
3833       dlen = 0;
3834       for (j = 0; j < jend; j++) {
3835         if (*jj < rstart || *jj >= rend) olen++;
3836         else dlen++;
3837         jj++;
3838       }
3839       olens[i] = olen;
3840       dlens[i] = dlen;
3841     }
3842     PetscCall(MatCreate(comm, &M));
3843     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3844     PetscCall(MatSetBlockSizes(M, bs, cbs));
3845     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3846     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3847     PetscCall(PetscFree(dlens));
3848   } else {
3849     PetscInt ml, nl;
3850 
3851     M = *newmat;
3852     PetscCall(MatGetLocalSize(M, &ml, &nl));
3853     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3854     PetscCall(MatZeroEntries(M));
3855     /*
3856          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3857        rather than the slower MatSetValues().
3858     */
3859     M->was_assembled = PETSC_TRUE;
3860     M->assembled     = PETSC_FALSE;
3861   }
3862   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3863   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3864   ii  = aij->i;
3865   jj  = aij->j;
3866 
3867   /* trigger copy to CPU if needed */
3868   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3869   for (i = 0; i < m; i++) {
3870     row   = rstart + i;
3871     nz    = ii[i + 1] - ii[i];
3872     cwork = jj;
3873     jj    = PetscSafePointerPlusOffset(jj, nz);
3874     vwork = aa;
3875     aa    = PetscSafePointerPlusOffset(aa, nz);
3876     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3877   }
3878   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3879 
3880   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3881   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3882   *newmat = M;
3883 
3884   /* save submatrix used in processor for next request */
3885   if (call == MAT_INITIAL_MATRIX) {
3886     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3887     PetscCall(MatDestroy(&Mreuse));
3888   }
3889   PetscFunctionReturn(PETSC_SUCCESS);
3890 }
3891 
3892 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3893 {
3894   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3895   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3896   const PetscInt *JJ;
3897   PetscBool       nooffprocentries;
3898   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3899 
3900   PetscFunctionBegin;
3901   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3902 
3903   PetscCall(PetscLayoutSetUp(B->rmap));
3904   PetscCall(PetscLayoutSetUp(B->cmap));
3905   m      = B->rmap->n;
3906   cstart = B->cmap->rstart;
3907   cend   = B->cmap->rend;
3908   rstart = B->rmap->rstart;
3909 
3910   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3911 
3912   if (PetscDefined(USE_DEBUG)) {
3913     for (i = 0; i < m; i++) {
3914       nnz = Ii[i + 1] - Ii[i];
3915       JJ  = PetscSafePointerPlusOffset(J, Ii[i]);
3916       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3917       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3918       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3919     }
3920   }
3921 
3922   for (i = 0; i < m; i++) {
3923     nnz     = Ii[i + 1] - Ii[i];
3924     JJ      = PetscSafePointerPlusOffset(J, Ii[i]);
3925     nnz_max = PetscMax(nnz_max, nnz);
3926     d       = 0;
3927     for (j = 0; j < nnz; j++) {
3928       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3929     }
3930     d_nnz[i] = d;
3931     o_nnz[i] = nnz - d;
3932   }
3933   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3934   PetscCall(PetscFree2(d_nnz, o_nnz));
3935 
3936   for (i = 0; i < m; i++) {
3937     ii = i + rstart;
3938     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES));
3939   }
3940   nooffprocentries    = B->nooffprocentries;
3941   B->nooffprocentries = PETSC_TRUE;
3942   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3943   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3944   B->nooffprocentries = nooffprocentries;
3945 
3946   /* count number of entries below block diagonal */
3947   PetscCall(PetscFree(Aij->ld));
3948   PetscCall(PetscCalloc1(m, &ld));
3949   Aij->ld = ld;
3950   for (i = 0; i < m; i++) {
3951     nnz = Ii[i + 1] - Ii[i];
3952     j   = 0;
3953     while (j < nnz && J[j] < cstart) j++;
3954     ld[i] = j;
3955     if (J) J += nnz;
3956   }
3957 
3958   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3959   PetscFunctionReturn(PETSC_SUCCESS);
3960 }
3961 
3962 /*@
3963   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3964   (the default parallel PETSc format).
3965 
3966   Collective
3967 
3968   Input Parameters:
3969 + B - the matrix
3970 . i - the indices into j for the start of each local row (starts with zero)
3971 . j - the column indices for each local row (starts with zero)
3972 - v - optional values in the matrix
3973 
3974   Level: developer
3975 
3976   Notes:
3977   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3978   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3979   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3980 
3981   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3982 
3983   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3984 
3985   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3986 
3987   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3988   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3989 
3990   The format which is used for the sparse matrix input, is equivalent to a
3991   row-major ordering.. i.e for the following matrix, the input data expected is
3992   as shown
3993 .vb
3994         1 0 0
3995         2 0 3     P0
3996        -------
3997         4 5 6     P1
3998 
3999      Process0 [P0] rows_owned=[0,1]
4000         i =  {0,1,3}  [size = nrow+1  = 2+1]
4001         j =  {0,0,2}  [size = 3]
4002         v =  {1,2,3}  [size = 3]
4003 
4004      Process1 [P1] rows_owned=[2]
4005         i =  {0,3}    [size = nrow+1  = 1+1]
4006         j =  {0,1,2}  [size = 3]
4007         v =  {4,5,6}  [size = 3]
4008 .ve
4009 
4010 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4011           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4012 @*/
4013 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4014 {
4015   PetscFunctionBegin;
4016   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4017   PetscFunctionReturn(PETSC_SUCCESS);
4018 }
4019 
4020 /*@C
4021   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4022   (the default parallel PETSc format).  For good matrix assembly performance
4023   the user should preallocate the matrix storage by setting the parameters
4024   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4025 
4026   Collective
4027 
4028   Input Parameters:
4029 + B     - the matrix
4030 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4031            (same value is used for all local rows)
4032 . d_nnz - array containing the number of nonzeros in the various rows of the
4033            DIAGONAL portion of the local submatrix (possibly different for each row)
4034            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4035            The size of this array is equal to the number of local rows, i.e 'm'.
4036            For matrices that will be factored, you must leave room for (and set)
4037            the diagonal entry even if it is zero.
4038 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4039            submatrix (same value is used for all local rows).
4040 - o_nnz - array containing the number of nonzeros in the various rows of the
4041            OFF-DIAGONAL portion of the local submatrix (possibly different for
4042            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4043            structure. The size of this array is equal to the number
4044            of local rows, i.e 'm'.
4045 
4046   Example Usage:
4047   Consider the following 8x8 matrix with 34 non-zero values, that is
4048   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4049   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4050   as follows
4051 
4052 .vb
4053             1  2  0  |  0  3  0  |  0  4
4054     Proc0   0  5  6  |  7  0  0  |  8  0
4055             9  0 10  | 11  0  0  | 12  0
4056     -------------------------------------
4057            13  0 14  | 15 16 17  |  0  0
4058     Proc1   0 18  0  | 19 20 21  |  0  0
4059             0  0  0  | 22 23  0  | 24  0
4060     -------------------------------------
4061     Proc2  25 26 27  |  0  0 28  | 29  0
4062            30  0  0  | 31 32 33  |  0 34
4063 .ve
4064 
4065   This can be represented as a collection of submatrices as
4066 .vb
4067       A B C
4068       D E F
4069       G H I
4070 .ve
4071 
4072   Where the submatrices A,B,C are owned by proc0, D,E,F are
4073   owned by proc1, G,H,I are owned by proc2.
4074 
4075   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4076   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4077   The 'M','N' parameters are 8,8, and have the same values on all procs.
4078 
4079   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4080   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4081   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4082   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4083   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4084   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4085 
4086   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4087   allocated for every row of the local diagonal submatrix, and `o_nz`
4088   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4089   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4090   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4091   In this case, the values of `d_nz`, `o_nz` are
4092 .vb
4093      proc0  dnz = 2, o_nz = 2
4094      proc1  dnz = 3, o_nz = 2
4095      proc2  dnz = 1, o_nz = 4
4096 .ve
4097   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4098   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4099   for proc3. i.e we are using 12+15+10=37 storage locations to store
4100   34 values.
4101 
4102   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4103   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4104   In the above case the values for `d_nnz`, `o_nnz` are
4105 .vb
4106      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4107      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4108      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4109 .ve
4110   Here the space allocated is sum of all the above values i.e 34, and
4111   hence pre-allocation is perfect.
4112 
4113   Level: intermediate
4114 
4115   Notes:
4116   If the *_nnz parameter is given then the *_nz parameter is ignored
4117 
4118   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4119   storage.  The stored row and column indices begin with zero.
4120   See [Sparse Matrices](sec_matsparse) for details.
4121 
4122   The parallel matrix is partitioned such that the first m0 rows belong to
4123   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4124   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4125 
4126   The DIAGONAL portion of the local submatrix of a processor can be defined
4127   as the submatrix which is obtained by extraction the part corresponding to
4128   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4129   first row that belongs to the processor, r2 is the last row belonging to
4130   the this processor, and c1-c2 is range of indices of the local part of a
4131   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4132   common case of a square matrix, the row and column ranges are the same and
4133   the DIAGONAL part is also square. The remaining portion of the local
4134   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4135 
4136   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4137 
4138   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4139   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4140   You can also run with the option `-info` and look for messages with the string
4141   malloc in them to see if additional memory allocation was needed.
4142 
4143 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4144           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4145 @*/
4146 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4147 {
4148   PetscFunctionBegin;
4149   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4150   PetscValidType(B, 1);
4151   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4152   PetscFunctionReturn(PETSC_SUCCESS);
4153 }
4154 
4155 /*@
4156   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4157   CSR format for the local rows.
4158 
4159   Collective
4160 
4161   Input Parameters:
4162 + comm - MPI communicator
4163 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4164 . n    - This value should be the same as the local size used in creating the
4165        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4166        calculated if N is given) For square matrices n is almost always m.
4167 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4168 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4169 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4170 . j    - column indices
4171 - a    - optional matrix values
4172 
4173   Output Parameter:
4174 . mat - the matrix
4175 
4176   Level: intermediate
4177 
4178   Notes:
4179   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4180   thus you CANNOT change the matrix entries by changing the values of a[] after you have
4181   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4182 
4183   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4184 
4185   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArrays()`
4186 
4187   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
4188   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4189 
4190   The format which is used for the sparse matrix input, is equivalent to a
4191   row-major ordering.. i.e for the following matrix, the input data expected is
4192   as shown
4193 .vb
4194         1 0 0
4195         2 0 3     P0
4196        -------
4197         4 5 6     P1
4198 
4199      Process0 [P0] rows_owned=[0,1]
4200         i =  {0,1,3}  [size = nrow+1  = 2+1]
4201         j =  {0,0,2}  [size = 3]
4202         v =  {1,2,3}  [size = 3]
4203 
4204      Process1 [P1] rows_owned=[2]
4205         i =  {0,3}    [size = nrow+1  = 1+1]
4206         j =  {0,1,2}  [size = 3]
4207         v =  {4,5,6}  [size = 3]
4208 .ve
4209 
4210 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4211           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4212 @*/
4213 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4214 {
4215   PetscFunctionBegin;
4216   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4217   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4218   PetscCall(MatCreate(comm, mat));
4219   PetscCall(MatSetSizes(*mat, m, n, M, N));
4220   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4221   PetscCall(MatSetType(*mat, MATMPIAIJ));
4222   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4223   PetscFunctionReturn(PETSC_SUCCESS);
4224 }
4225 
4226 /*@
4227   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4228   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4229   from `MatCreateMPIAIJWithArrays()`
4230 
4231   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4232 
4233   Collective
4234 
4235   Input Parameters:
4236 + mat - the matrix
4237 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4238 . n   - This value should be the same as the local size used in creating the
4239        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4240        calculated if N is given) For square matrices n is almost always m.
4241 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4242 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4243 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4244 . J   - column indices
4245 - v   - matrix values
4246 
4247   Level: deprecated
4248 
4249 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4250           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4251 @*/
4252 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4253 {
4254   PetscInt        nnz, i;
4255   PetscBool       nooffprocentries;
4256   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4257   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4258   PetscScalar    *ad, *ao;
4259   PetscInt        ldi, Iii, md;
4260   const PetscInt *Adi = Ad->i;
4261   PetscInt       *ld  = Aij->ld;
4262 
4263   PetscFunctionBegin;
4264   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4265   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4266   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4267   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4268 
4269   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4270   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4271 
4272   for (i = 0; i < m; i++) {
4273     if (PetscDefined(USE_DEBUG)) {
4274       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4275         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4276         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4277       }
4278     }
4279     nnz = Ii[i + 1] - Ii[i];
4280     Iii = Ii[i];
4281     ldi = ld[i];
4282     md  = Adi[i + 1] - Adi[i];
4283     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4284     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4285     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4286     ad += md;
4287     ao += nnz - md;
4288   }
4289   nooffprocentries      = mat->nooffprocentries;
4290   mat->nooffprocentries = PETSC_TRUE;
4291   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4292   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4293   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4294   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4295   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4296   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4297   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4298   mat->nooffprocentries = nooffprocentries;
4299   PetscFunctionReturn(PETSC_SUCCESS);
4300 }
4301 
4302 /*@
4303   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4304 
4305   Collective
4306 
4307   Input Parameters:
4308 + mat - the matrix
4309 - v   - matrix values, stored by row
4310 
4311   Level: intermediate
4312 
4313   Notes:
4314   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4315 
4316   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4317 
4318 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4319           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4320 @*/
4321 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4322 {
4323   PetscInt        nnz, i, m;
4324   PetscBool       nooffprocentries;
4325   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4326   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4327   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4328   PetscScalar    *ad, *ao;
4329   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4330   PetscInt        ldi, Iii, md;
4331   PetscInt       *ld = Aij->ld;
4332 
4333   PetscFunctionBegin;
4334   m = mat->rmap->n;
4335 
4336   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4337   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4338   Iii = 0;
4339   for (i = 0; i < m; i++) {
4340     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4341     ldi = ld[i];
4342     md  = Adi[i + 1] - Adi[i];
4343     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4344     ad += md;
4345     if (ao) {
4346       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4347       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4348       ao += nnz - md;
4349     }
4350     Iii += nnz;
4351   }
4352   nooffprocentries      = mat->nooffprocentries;
4353   mat->nooffprocentries = PETSC_TRUE;
4354   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4355   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4356   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4357   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4358   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4359   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4360   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4361   mat->nooffprocentries = nooffprocentries;
4362   PetscFunctionReturn(PETSC_SUCCESS);
4363 }
4364 
4365 /*@C
4366   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4367   (the default parallel PETSc format).  For good matrix assembly performance
4368   the user should preallocate the matrix storage by setting the parameters
4369   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4370 
4371   Collective
4372 
4373   Input Parameters:
4374 + comm  - MPI communicator
4375 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4376            This value should be the same as the local size used in creating the
4377            y vector for the matrix-vector product y = Ax.
4378 . n     - This value should be the same as the local size used in creating the
4379        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4380        calculated if N is given) For square matrices n is almost always m.
4381 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4382 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4383 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4384            (same value is used for all local rows)
4385 . d_nnz - array containing the number of nonzeros in the various rows of the
4386            DIAGONAL portion of the local submatrix (possibly different for each row)
4387            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4388            The size of this array is equal to the number of local rows, i.e 'm'.
4389 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4390            submatrix (same value is used for all local rows).
4391 - o_nnz - array containing the number of nonzeros in the various rows of the
4392            OFF-DIAGONAL portion of the local submatrix (possibly different for
4393            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4394            structure. The size of this array is equal to the number
4395            of local rows, i.e 'm'.
4396 
4397   Output Parameter:
4398 . A - the matrix
4399 
4400   Options Database Keys:
4401 + -mat_no_inode                     - Do not use inodes
4402 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4403 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4404         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4405         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4406 
4407   Level: intermediate
4408 
4409   Notes:
4410   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4411   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4412   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4413 
4414   If the *_nnz parameter is given then the *_nz parameter is ignored
4415 
4416   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4417   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4418   storage requirements for this matrix.
4419 
4420   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4421   processor than it must be used on all processors that share the object for
4422   that argument.
4423 
4424   The user MUST specify either the local or global matrix dimensions
4425   (possibly both).
4426 
4427   The parallel matrix is partitioned across processors such that the
4428   first m0 rows belong to process 0, the next m1 rows belong to
4429   process 1, the next m2 rows belong to process 2 etc.. where
4430   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4431   values corresponding to [m x N] submatrix.
4432 
4433   The columns are logically partitioned with the n0 columns belonging
4434   to 0th partition, the next n1 columns belonging to the next
4435   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4436 
4437   The DIAGONAL portion of the local submatrix on any given processor
4438   is the submatrix corresponding to the rows and columns m,n
4439   corresponding to the given processor. i.e diagonal matrix on
4440   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4441   etc. The remaining portion of the local submatrix [m x (N-n)]
4442   constitute the OFF-DIAGONAL portion. The example below better
4443   illustrates this concept.
4444 
4445   For a square global matrix we define each processor's diagonal portion
4446   to be its local rows and the corresponding columns (a square submatrix);
4447   each processor's off-diagonal portion encompasses the remainder of the
4448   local matrix (a rectangular submatrix).
4449 
4450   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4451 
4452   When calling this routine with a single process communicator, a matrix of
4453   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4454   type of communicator, use the construction mechanism
4455 .vb
4456   MatCreate(..., &A);
4457   MatSetType(A, MATMPIAIJ);
4458   MatSetSizes(A, m, n, M, N);
4459   MatMPIAIJSetPreallocation(A, ...);
4460 .ve
4461 
4462   By default, this format uses inodes (identical nodes) when possible.
4463   We search for consecutive rows with the same nonzero structure, thereby
4464   reusing matrix information to achieve increased efficiency.
4465 
4466   Example Usage:
4467   Consider the following 8x8 matrix with 34 non-zero values, that is
4468   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4469   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4470   as follows
4471 
4472 .vb
4473             1  2  0  |  0  3  0  |  0  4
4474     Proc0   0  5  6  |  7  0  0  |  8  0
4475             9  0 10  | 11  0  0  | 12  0
4476     -------------------------------------
4477            13  0 14  | 15 16 17  |  0  0
4478     Proc1   0 18  0  | 19 20 21  |  0  0
4479             0  0  0  | 22 23  0  | 24  0
4480     -------------------------------------
4481     Proc2  25 26 27  |  0  0 28  | 29  0
4482            30  0  0  | 31 32 33  |  0 34
4483 .ve
4484 
4485   This can be represented as a collection of submatrices as
4486 
4487 .vb
4488       A B C
4489       D E F
4490       G H I
4491 .ve
4492 
4493   Where the submatrices A,B,C are owned by proc0, D,E,F are
4494   owned by proc1, G,H,I are owned by proc2.
4495 
4496   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4497   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4498   The 'M','N' parameters are 8,8, and have the same values on all procs.
4499 
4500   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4501   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4502   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4503   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4504   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4505   matrix, ans [DF] as another SeqAIJ matrix.
4506 
4507   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4508   allocated for every row of the local diagonal submatrix, and `o_nz`
4509   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4510   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4511   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4512   In this case, the values of `d_nz`,`o_nz` are
4513 .vb
4514      proc0  dnz = 2, o_nz = 2
4515      proc1  dnz = 3, o_nz = 2
4516      proc2  dnz = 1, o_nz = 4
4517 .ve
4518   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4519   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4520   for proc3. i.e we are using 12+15+10=37 storage locations to store
4521   34 values.
4522 
4523   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4524   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4525   In the above case the values for d_nnz,o_nnz are
4526 .vb
4527      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4528      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4529      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4530 .ve
4531   Here the space allocated is sum of all the above values i.e 34, and
4532   hence pre-allocation is perfect.
4533 
4534 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4535           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4536 @*/
4537 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4538 {
4539   PetscMPIInt size;
4540 
4541   PetscFunctionBegin;
4542   PetscCall(MatCreate(comm, A));
4543   PetscCall(MatSetSizes(*A, m, n, M, N));
4544   PetscCallMPI(MPI_Comm_size(comm, &size));
4545   if (size > 1) {
4546     PetscCall(MatSetType(*A, MATMPIAIJ));
4547     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4548   } else {
4549     PetscCall(MatSetType(*A, MATSEQAIJ));
4550     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4551   }
4552   PetscFunctionReturn(PETSC_SUCCESS);
4553 }
4554 
4555 /*MC
4556     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4557 
4558     Synopsis:
4559     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4560 
4561     Not Collective
4562 
4563     Input Parameter:
4564 .   A - the `MATMPIAIJ` matrix
4565 
4566     Output Parameters:
4567 +   Ad - the diagonal portion of the matrix
4568 .   Ao - the off-diagonal portion of the matrix
4569 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4570 -   ierr - error code
4571 
4572      Level: advanced
4573 
4574     Note:
4575     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4576 
4577 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4578 M*/
4579 
4580 /*MC
4581     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4582 
4583     Synopsis:
4584     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4585 
4586     Not Collective
4587 
4588     Input Parameters:
4589 +   A - the `MATMPIAIJ` matrix
4590 .   Ad - the diagonal portion of the matrix
4591 .   Ao - the off-diagonal portion of the matrix
4592 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4593 -   ierr - error code
4594 
4595      Level: advanced
4596 
4597 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4598 M*/
4599 
4600 /*@C
4601   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4602 
4603   Not Collective
4604 
4605   Input Parameter:
4606 . A - The `MATMPIAIJ` matrix
4607 
4608   Output Parameters:
4609 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4610 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4611 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4612 
4613   Level: intermediate
4614 
4615   Note:
4616   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4617   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4618   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4619   local column numbers to global column numbers in the original matrix.
4620 
4621   Fortran Notes:
4622   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4623 
4624 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4625 @*/
4626 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4627 {
4628   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4629   PetscBool   flg;
4630 
4631   PetscFunctionBegin;
4632   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4633   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4634   if (Ad) *Ad = a->A;
4635   if (Ao) *Ao = a->B;
4636   if (colmap) *colmap = a->garray;
4637   PetscFunctionReturn(PETSC_SUCCESS);
4638 }
4639 
4640 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4641 {
4642   PetscInt     m, N, i, rstart, nnz, Ii;
4643   PetscInt    *indx;
4644   PetscScalar *values;
4645   MatType      rootType;
4646 
4647   PetscFunctionBegin;
4648   PetscCall(MatGetSize(inmat, &m, &N));
4649   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4650     PetscInt *dnz, *onz, sum, bs, cbs;
4651 
4652     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4653     /* Check sum(n) = N */
4654     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4655     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4656 
4657     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4658     rstart -= m;
4659 
4660     MatPreallocateBegin(comm, m, n, dnz, onz);
4661     for (i = 0; i < m; i++) {
4662       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4663       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4664       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4665     }
4666 
4667     PetscCall(MatCreate(comm, outmat));
4668     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4669     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4670     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4671     PetscCall(MatGetRootType_Private(inmat, &rootType));
4672     PetscCall(MatSetType(*outmat, rootType));
4673     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4674     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4675     MatPreallocateEnd(dnz, onz);
4676     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4677   }
4678 
4679   /* numeric phase */
4680   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4681   for (i = 0; i < m; i++) {
4682     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4683     Ii = i + rstart;
4684     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4685     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4686   }
4687   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4688   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4689   PetscFunctionReturn(PETSC_SUCCESS);
4690 }
4691 
4692 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4693 {
4694   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4695 
4696   PetscFunctionBegin;
4697   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4698   PetscCall(PetscFree(merge->id_r));
4699   PetscCall(PetscFree(merge->len_s));
4700   PetscCall(PetscFree(merge->len_r));
4701   PetscCall(PetscFree(merge->bi));
4702   PetscCall(PetscFree(merge->bj));
4703   PetscCall(PetscFree(merge->buf_ri[0]));
4704   PetscCall(PetscFree(merge->buf_ri));
4705   PetscCall(PetscFree(merge->buf_rj[0]));
4706   PetscCall(PetscFree(merge->buf_rj));
4707   PetscCall(PetscFree(merge->coi));
4708   PetscCall(PetscFree(merge->coj));
4709   PetscCall(PetscFree(merge->owners_co));
4710   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4711   PetscCall(PetscFree(merge));
4712   PetscFunctionReturn(PETSC_SUCCESS);
4713 }
4714 
4715 #include <../src/mat/utils/freespace.h>
4716 #include <petscbt.h>
4717 
4718 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4719 {
4720   MPI_Comm             comm;
4721   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4722   PetscMPIInt          size, rank, taga, *len_s;
4723   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4724   PetscInt             proc, m;
4725   PetscInt           **buf_ri, **buf_rj;
4726   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4727   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4728   MPI_Request         *s_waits, *r_waits;
4729   MPI_Status          *status;
4730   const MatScalar     *aa, *a_a;
4731   MatScalar          **abuf_r, *ba_i;
4732   Mat_Merge_SeqsToMPI *merge;
4733   PetscContainer       container;
4734 
4735   PetscFunctionBegin;
4736   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4737   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4738 
4739   PetscCallMPI(MPI_Comm_size(comm, &size));
4740   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4741 
4742   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4743   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4744   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4745   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4746   aa = a_a;
4747 
4748   bi     = merge->bi;
4749   bj     = merge->bj;
4750   buf_ri = merge->buf_ri;
4751   buf_rj = merge->buf_rj;
4752 
4753   PetscCall(PetscMalloc1(size, &status));
4754   owners = merge->rowmap->range;
4755   len_s  = merge->len_s;
4756 
4757   /* send and recv matrix values */
4758   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4759   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4760 
4761   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4762   for (proc = 0, k = 0; proc < size; proc++) {
4763     if (!len_s[proc]) continue;
4764     i = owners[proc];
4765     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4766     k++;
4767   }
4768 
4769   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4770   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4771   PetscCall(PetscFree(status));
4772 
4773   PetscCall(PetscFree(s_waits));
4774   PetscCall(PetscFree(r_waits));
4775 
4776   /* insert mat values of mpimat */
4777   PetscCall(PetscMalloc1(N, &ba_i));
4778   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4779 
4780   for (k = 0; k < merge->nrecv; k++) {
4781     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4782     nrows       = *(buf_ri_k[k]);
4783     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4784     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4785   }
4786 
4787   /* set values of ba */
4788   m = merge->rowmap->n;
4789   for (i = 0; i < m; i++) {
4790     arow = owners[rank] + i;
4791     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4792     bnzi = bi[i + 1] - bi[i];
4793     PetscCall(PetscArrayzero(ba_i, bnzi));
4794 
4795     /* add local non-zero vals of this proc's seqmat into ba */
4796     anzi   = ai[arow + 1] - ai[arow];
4797     aj     = a->j + ai[arow];
4798     aa     = a_a + ai[arow];
4799     nextaj = 0;
4800     for (j = 0; nextaj < anzi; j++) {
4801       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4802         ba_i[j] += aa[nextaj++];
4803       }
4804     }
4805 
4806     /* add received vals into ba */
4807     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4808       /* i-th row */
4809       if (i == *nextrow[k]) {
4810         anzi   = *(nextai[k] + 1) - *nextai[k];
4811         aj     = buf_rj[k] + *(nextai[k]);
4812         aa     = abuf_r[k] + *(nextai[k]);
4813         nextaj = 0;
4814         for (j = 0; nextaj < anzi; j++) {
4815           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4816             ba_i[j] += aa[nextaj++];
4817           }
4818         }
4819         nextrow[k]++;
4820         nextai[k]++;
4821       }
4822     }
4823     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4824   }
4825   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4826   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4827   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4828 
4829   PetscCall(PetscFree(abuf_r[0]));
4830   PetscCall(PetscFree(abuf_r));
4831   PetscCall(PetscFree(ba_i));
4832   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4833   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4834   PetscFunctionReturn(PETSC_SUCCESS);
4835 }
4836 
4837 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4838 {
4839   Mat                  B_mpi;
4840   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4841   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4842   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4843   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4844   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4845   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4846   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4847   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4848   MPI_Status          *status;
4849   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4850   PetscBT              lnkbt;
4851   Mat_Merge_SeqsToMPI *merge;
4852   PetscContainer       container;
4853 
4854   PetscFunctionBegin;
4855   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4856 
4857   /* make sure it is a PETSc comm */
4858   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4859   PetscCallMPI(MPI_Comm_size(comm, &size));
4860   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4861 
4862   PetscCall(PetscNew(&merge));
4863   PetscCall(PetscMalloc1(size, &status));
4864 
4865   /* determine row ownership */
4866   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4867   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4868   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4869   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4870   PetscCall(PetscLayoutSetUp(merge->rowmap));
4871   PetscCall(PetscMalloc1(size, &len_si));
4872   PetscCall(PetscMalloc1(size, &merge->len_s));
4873 
4874   m      = merge->rowmap->n;
4875   owners = merge->rowmap->range;
4876 
4877   /* determine the number of messages to send, their lengths */
4878   len_s = merge->len_s;
4879 
4880   len          = 0; /* length of buf_si[] */
4881   merge->nsend = 0;
4882   for (proc = 0; proc < size; proc++) {
4883     len_si[proc] = 0;
4884     if (proc == rank) {
4885       len_s[proc] = 0;
4886     } else {
4887       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4888       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4889     }
4890     if (len_s[proc]) {
4891       merge->nsend++;
4892       nrows = 0;
4893       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4894         if (ai[i + 1] > ai[i]) nrows++;
4895       }
4896       len_si[proc] = 2 * (nrows + 1);
4897       len += len_si[proc];
4898     }
4899   }
4900 
4901   /* determine the number and length of messages to receive for ij-structure */
4902   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4903   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4904 
4905   /* post the Irecv of j-structure */
4906   PetscCall(PetscCommGetNewTag(comm, &tagj));
4907   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4908 
4909   /* post the Isend of j-structure */
4910   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4911 
4912   for (proc = 0, k = 0; proc < size; proc++) {
4913     if (!len_s[proc]) continue;
4914     i = owners[proc];
4915     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4916     k++;
4917   }
4918 
4919   /* receives and sends of j-structure are complete */
4920   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4921   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4922 
4923   /* send and recv i-structure */
4924   PetscCall(PetscCommGetNewTag(comm, &tagi));
4925   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4926 
4927   PetscCall(PetscMalloc1(len + 1, &buf_s));
4928   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4929   for (proc = 0, k = 0; proc < size; proc++) {
4930     if (!len_s[proc]) continue;
4931     /* form outgoing message for i-structure:
4932          buf_si[0]:                 nrows to be sent
4933                [1:nrows]:           row index (global)
4934                [nrows+1:2*nrows+1]: i-structure index
4935     */
4936     nrows       = len_si[proc] / 2 - 1;
4937     buf_si_i    = buf_si + nrows + 1;
4938     buf_si[0]   = nrows;
4939     buf_si_i[0] = 0;
4940     nrows       = 0;
4941     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4942       anzi = ai[i + 1] - ai[i];
4943       if (anzi) {
4944         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4945         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4946         nrows++;
4947       }
4948     }
4949     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4950     k++;
4951     buf_si += len_si[proc];
4952   }
4953 
4954   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4955   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4956 
4957   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4958   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4959 
4960   PetscCall(PetscFree(len_si));
4961   PetscCall(PetscFree(len_ri));
4962   PetscCall(PetscFree(rj_waits));
4963   PetscCall(PetscFree2(si_waits, sj_waits));
4964   PetscCall(PetscFree(ri_waits));
4965   PetscCall(PetscFree(buf_s));
4966   PetscCall(PetscFree(status));
4967 
4968   /* compute a local seq matrix in each processor */
4969   /* allocate bi array and free space for accumulating nonzero column info */
4970   PetscCall(PetscMalloc1(m + 1, &bi));
4971   bi[0] = 0;
4972 
4973   /* create and initialize a linked list */
4974   nlnk = N + 1;
4975   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4976 
4977   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4978   len = ai[owners[rank + 1]] - ai[owners[rank]];
4979   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4980 
4981   current_space = free_space;
4982 
4983   /* determine symbolic info for each local row */
4984   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4985 
4986   for (k = 0; k < merge->nrecv; k++) {
4987     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4988     nrows       = *buf_ri_k[k];
4989     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4990     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4991   }
4992 
4993   MatPreallocateBegin(comm, m, n, dnz, onz);
4994   len = 0;
4995   for (i = 0; i < m; i++) {
4996     bnzi = 0;
4997     /* add local non-zero cols of this proc's seqmat into lnk */
4998     arow = owners[rank] + i;
4999     anzi = ai[arow + 1] - ai[arow];
5000     aj   = a->j + ai[arow];
5001     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5002     bnzi += nlnk;
5003     /* add received col data into lnk */
5004     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5005       if (i == *nextrow[k]) {            /* i-th row */
5006         anzi = *(nextai[k] + 1) - *nextai[k];
5007         aj   = buf_rj[k] + *nextai[k];
5008         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5009         bnzi += nlnk;
5010         nextrow[k]++;
5011         nextai[k]++;
5012       }
5013     }
5014     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5015 
5016     /* if free space is not available, make more free space */
5017     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5018     /* copy data into free space, then initialize lnk */
5019     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5020     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5021 
5022     current_space->array += bnzi;
5023     current_space->local_used += bnzi;
5024     current_space->local_remaining -= bnzi;
5025 
5026     bi[i + 1] = bi[i] + bnzi;
5027   }
5028 
5029   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5030 
5031   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5032   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5033   PetscCall(PetscLLDestroy(lnk, lnkbt));
5034 
5035   /* create symbolic parallel matrix B_mpi */
5036   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5037   PetscCall(MatCreate(comm, &B_mpi));
5038   if (n == PETSC_DECIDE) {
5039     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5040   } else {
5041     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5042   }
5043   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5044   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5045   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5046   MatPreallocateEnd(dnz, onz);
5047   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5048 
5049   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5050   B_mpi->assembled = PETSC_FALSE;
5051   merge->bi        = bi;
5052   merge->bj        = bj;
5053   merge->buf_ri    = buf_ri;
5054   merge->buf_rj    = buf_rj;
5055   merge->coi       = NULL;
5056   merge->coj       = NULL;
5057   merge->owners_co = NULL;
5058 
5059   PetscCall(PetscCommDestroy(&comm));
5060 
5061   /* attach the supporting struct to B_mpi for reuse */
5062   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5063   PetscCall(PetscContainerSetPointer(container, merge));
5064   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5065   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5066   PetscCall(PetscContainerDestroy(&container));
5067   *mpimat = B_mpi;
5068 
5069   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5070   PetscFunctionReturn(PETSC_SUCCESS);
5071 }
5072 
5073 /*@C
5074   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5075   matrices from each processor
5076 
5077   Collective
5078 
5079   Input Parameters:
5080 + comm   - the communicators the parallel matrix will live on
5081 . seqmat - the input sequential matrices
5082 . m      - number of local rows (or `PETSC_DECIDE`)
5083 . n      - number of local columns (or `PETSC_DECIDE`)
5084 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5085 
5086   Output Parameter:
5087 . mpimat - the parallel matrix generated
5088 
5089   Level: advanced
5090 
5091   Note:
5092   The dimensions of the sequential matrix in each processor MUST be the same.
5093   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5094   destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5095 
5096 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5097 @*/
5098 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5099 {
5100   PetscMPIInt size;
5101 
5102   PetscFunctionBegin;
5103   PetscCallMPI(MPI_Comm_size(comm, &size));
5104   if (size == 1) {
5105     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5106     if (scall == MAT_INITIAL_MATRIX) {
5107       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5108     } else {
5109       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5110     }
5111     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5112     PetscFunctionReturn(PETSC_SUCCESS);
5113   }
5114   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5115   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5116   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5117   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5118   PetscFunctionReturn(PETSC_SUCCESS);
5119 }
5120 
5121 /*@
5122   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5123 
5124   Not Collective
5125 
5126   Input Parameter:
5127 . A - the matrix
5128 
5129   Output Parameter:
5130 . A_loc - the local sequential matrix generated
5131 
5132   Level: developer
5133 
5134   Notes:
5135   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5136   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5137   `n` is the global column count obtained with `MatGetSize()`
5138 
5139   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5140 
5141   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5142 
5143   Destroy the matrix with `MatDestroy()`
5144 
5145 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5146 @*/
5147 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5148 {
5149   PetscBool mpi;
5150 
5151   PetscFunctionBegin;
5152   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5153   if (mpi) {
5154     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5155   } else {
5156     *A_loc = A;
5157     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5158   }
5159   PetscFunctionReturn(PETSC_SUCCESS);
5160 }
5161 
5162 /*@
5163   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5164 
5165   Not Collective
5166 
5167   Input Parameters:
5168 + A     - the matrix
5169 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5170 
5171   Output Parameter:
5172 . A_loc - the local sequential matrix generated
5173 
5174   Level: developer
5175 
5176   Notes:
5177   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5178   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5179   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5180 
5181   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5182 
5183   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5184   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5185   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5186   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5187 
5188 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5189 @*/
5190 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5191 {
5192   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5193   Mat_SeqAIJ        *mat, *a, *b;
5194   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5195   const PetscScalar *aa, *ba, *aav, *bav;
5196   PetscScalar       *ca, *cam;
5197   PetscMPIInt        size;
5198   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5199   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5200   PetscBool          match;
5201 
5202   PetscFunctionBegin;
5203   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5204   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5205   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5206   if (size == 1) {
5207     if (scall == MAT_INITIAL_MATRIX) {
5208       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5209       *A_loc = mpimat->A;
5210     } else if (scall == MAT_REUSE_MATRIX) {
5211       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5212     }
5213     PetscFunctionReturn(PETSC_SUCCESS);
5214   }
5215 
5216   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5217   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5218   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5219   ai = a->i;
5220   aj = a->j;
5221   bi = b->i;
5222   bj = b->j;
5223   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5224   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5225   aa = aav;
5226   ba = bav;
5227   if (scall == MAT_INITIAL_MATRIX) {
5228     PetscCall(PetscMalloc1(1 + am, &ci));
5229     ci[0] = 0;
5230     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5231     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5232     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5233     k = 0;
5234     for (i = 0; i < am; i++) {
5235       ncols_o = bi[i + 1] - bi[i];
5236       ncols_d = ai[i + 1] - ai[i];
5237       /* off-diagonal portion of A */
5238       for (jo = 0; jo < ncols_o; jo++) {
5239         col = cmap[*bj];
5240         if (col >= cstart) break;
5241         cj[k] = col;
5242         bj++;
5243         ca[k++] = *ba++;
5244       }
5245       /* diagonal portion of A */
5246       for (j = 0; j < ncols_d; j++) {
5247         cj[k]   = cstart + *aj++;
5248         ca[k++] = *aa++;
5249       }
5250       /* off-diagonal portion of A */
5251       for (j = jo; j < ncols_o; j++) {
5252         cj[k]   = cmap[*bj++];
5253         ca[k++] = *ba++;
5254       }
5255     }
5256     /* put together the new matrix */
5257     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5258     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5259     /* Since these are PETSc arrays, change flags to free them as necessary. */
5260     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5261     mat->free_a  = PETSC_TRUE;
5262     mat->free_ij = PETSC_TRUE;
5263     mat->nonew   = 0;
5264   } else if (scall == MAT_REUSE_MATRIX) {
5265     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5266     ci  = mat->i;
5267     cj  = mat->j;
5268     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5269     for (i = 0; i < am; i++) {
5270       /* off-diagonal portion of A */
5271       ncols_o = bi[i + 1] - bi[i];
5272       for (jo = 0; jo < ncols_o; jo++) {
5273         col = cmap[*bj];
5274         if (col >= cstart) break;
5275         *cam++ = *ba++;
5276         bj++;
5277       }
5278       /* diagonal portion of A */
5279       ncols_d = ai[i + 1] - ai[i];
5280       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5281       /* off-diagonal portion of A */
5282       for (j = jo; j < ncols_o; j++) {
5283         *cam++ = *ba++;
5284         bj++;
5285       }
5286     }
5287     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5288   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5289   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5290   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5291   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5292   PetscFunctionReturn(PETSC_SUCCESS);
5293 }
5294 
5295 /*@
5296   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5297   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5298 
5299   Not Collective
5300 
5301   Input Parameters:
5302 + A     - the matrix
5303 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5304 
5305   Output Parameters:
5306 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5307 - A_loc - the local sequential matrix generated
5308 
5309   Level: developer
5310 
5311   Note:
5312   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5313   part, then those associated with the off-diagonal part (in its local ordering)
5314 
5315 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5316 @*/
5317 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5318 {
5319   Mat             Ao, Ad;
5320   const PetscInt *cmap;
5321   PetscMPIInt     size;
5322   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5323 
5324   PetscFunctionBegin;
5325   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5326   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5327   if (size == 1) {
5328     if (scall == MAT_INITIAL_MATRIX) {
5329       PetscCall(PetscObjectReference((PetscObject)Ad));
5330       *A_loc = Ad;
5331     } else if (scall == MAT_REUSE_MATRIX) {
5332       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5333     }
5334     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5335     PetscFunctionReturn(PETSC_SUCCESS);
5336   }
5337   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5338   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5339   if (f) {
5340     PetscCall((*f)(A, scall, glob, A_loc));
5341   } else {
5342     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5343     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5344     Mat_SeqAIJ        *c;
5345     PetscInt          *ai = a->i, *aj = a->j;
5346     PetscInt          *bi = b->i, *bj = b->j;
5347     PetscInt          *ci, *cj;
5348     const PetscScalar *aa, *ba;
5349     PetscScalar       *ca;
5350     PetscInt           i, j, am, dn, on;
5351 
5352     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5353     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5354     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5355     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5356     if (scall == MAT_INITIAL_MATRIX) {
5357       PetscInt k;
5358       PetscCall(PetscMalloc1(1 + am, &ci));
5359       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5360       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5361       ci[0] = 0;
5362       for (i = 0, k = 0; i < am; i++) {
5363         const PetscInt ncols_o = bi[i + 1] - bi[i];
5364         const PetscInt ncols_d = ai[i + 1] - ai[i];
5365         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5366         /* diagonal portion of A */
5367         for (j = 0; j < ncols_d; j++, k++) {
5368           cj[k] = *aj++;
5369           ca[k] = *aa++;
5370         }
5371         /* off-diagonal portion of A */
5372         for (j = 0; j < ncols_o; j++, k++) {
5373           cj[k] = dn + *bj++;
5374           ca[k] = *ba++;
5375         }
5376       }
5377       /* put together the new matrix */
5378       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5379       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5380       /* Since these are PETSc arrays, change flags to free them as necessary. */
5381       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5382       c->free_a  = PETSC_TRUE;
5383       c->free_ij = PETSC_TRUE;
5384       c->nonew   = 0;
5385       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5386     } else if (scall == MAT_REUSE_MATRIX) {
5387       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5388       for (i = 0; i < am; i++) {
5389         const PetscInt ncols_d = ai[i + 1] - ai[i];
5390         const PetscInt ncols_o = bi[i + 1] - bi[i];
5391         /* diagonal portion of A */
5392         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5393         /* off-diagonal portion of A */
5394         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5395       }
5396       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5397     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5398     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5399     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5400     if (glob) {
5401       PetscInt cst, *gidx;
5402 
5403       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5404       PetscCall(PetscMalloc1(dn + on, &gidx));
5405       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5406       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5407       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5408     }
5409   }
5410   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5411   PetscFunctionReturn(PETSC_SUCCESS);
5412 }
5413 
5414 /*@C
5415   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5416 
5417   Not Collective
5418 
5419   Input Parameters:
5420 + A     - the matrix
5421 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5422 . row   - index set of rows to extract (or `NULL`)
5423 - col   - index set of columns to extract (or `NULL`)
5424 
5425   Output Parameter:
5426 . A_loc - the local sequential matrix generated
5427 
5428   Level: developer
5429 
5430 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5431 @*/
5432 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5433 {
5434   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5435   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5436   IS          isrowa, iscola;
5437   Mat        *aloc;
5438   PetscBool   match;
5439 
5440   PetscFunctionBegin;
5441   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5442   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5443   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5444   if (!row) {
5445     start = A->rmap->rstart;
5446     end   = A->rmap->rend;
5447     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5448   } else {
5449     isrowa = *row;
5450   }
5451   if (!col) {
5452     start = A->cmap->rstart;
5453     cmap  = a->garray;
5454     nzA   = a->A->cmap->n;
5455     nzB   = a->B->cmap->n;
5456     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5457     ncols = 0;
5458     for (i = 0; i < nzB; i++) {
5459       if (cmap[i] < start) idx[ncols++] = cmap[i];
5460       else break;
5461     }
5462     imark = i;
5463     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5464     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5465     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5466   } else {
5467     iscola = *col;
5468   }
5469   if (scall != MAT_INITIAL_MATRIX) {
5470     PetscCall(PetscMalloc1(1, &aloc));
5471     aloc[0] = *A_loc;
5472   }
5473   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5474   if (!col) { /* attach global id of condensed columns */
5475     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5476   }
5477   *A_loc = aloc[0];
5478   PetscCall(PetscFree(aloc));
5479   if (!row) PetscCall(ISDestroy(&isrowa));
5480   if (!col) PetscCall(ISDestroy(&iscola));
5481   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5482   PetscFunctionReturn(PETSC_SUCCESS);
5483 }
5484 
5485 /*
5486  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5487  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5488  * on a global size.
5489  * */
5490 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5491 {
5492   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5493   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5494   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5495   PetscMPIInt            owner;
5496   PetscSFNode           *iremote, *oiremote;
5497   const PetscInt        *lrowindices;
5498   PetscSF                sf, osf;
5499   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5500   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5501   MPI_Comm               comm;
5502   ISLocalToGlobalMapping mapping;
5503   const PetscScalar     *pd_a, *po_a;
5504 
5505   PetscFunctionBegin;
5506   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5507   /* plocalsize is the number of roots
5508    * nrows is the number of leaves
5509    * */
5510   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5511   PetscCall(ISGetLocalSize(rows, &nrows));
5512   PetscCall(PetscCalloc1(nrows, &iremote));
5513   PetscCall(ISGetIndices(rows, &lrowindices));
5514   for (i = 0; i < nrows; i++) {
5515     /* Find a remote index and an owner for a row
5516      * The row could be local or remote
5517      * */
5518     owner = 0;
5519     lidx  = 0;
5520     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5521     iremote[i].index = lidx;
5522     iremote[i].rank  = owner;
5523   }
5524   /* Create SF to communicate how many nonzero columns for each row */
5525   PetscCall(PetscSFCreate(comm, &sf));
5526   /* SF will figure out the number of nonzero columns for each row, and their
5527    * offsets
5528    * */
5529   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5530   PetscCall(PetscSFSetFromOptions(sf));
5531   PetscCall(PetscSFSetUp(sf));
5532 
5533   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5534   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5535   PetscCall(PetscCalloc1(nrows, &pnnz));
5536   roffsets[0] = 0;
5537   roffsets[1] = 0;
5538   for (i = 0; i < plocalsize; i++) {
5539     /* diagonal */
5540     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5541     /* off-diagonal */
5542     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5543     /* compute offsets so that we relative location for each row */
5544     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5545     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5546   }
5547   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5548   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5549   /* 'r' means root, and 'l' means leaf */
5550   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5551   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5552   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5553   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5554   PetscCall(PetscSFDestroy(&sf));
5555   PetscCall(PetscFree(roffsets));
5556   PetscCall(PetscFree(nrcols));
5557   dntotalcols = 0;
5558   ontotalcols = 0;
5559   ncol        = 0;
5560   for (i = 0; i < nrows; i++) {
5561     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5562     ncol    = PetscMax(pnnz[i], ncol);
5563     /* diagonal */
5564     dntotalcols += nlcols[i * 2 + 0];
5565     /* off-diagonal */
5566     ontotalcols += nlcols[i * 2 + 1];
5567   }
5568   /* We do not need to figure the right number of columns
5569    * since all the calculations will be done by going through the raw data
5570    * */
5571   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5572   PetscCall(MatSetUp(*P_oth));
5573   PetscCall(PetscFree(pnnz));
5574   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5575   /* diagonal */
5576   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5577   /* off-diagonal */
5578   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5579   /* diagonal */
5580   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5581   /* off-diagonal */
5582   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5583   dntotalcols = 0;
5584   ontotalcols = 0;
5585   ntotalcols  = 0;
5586   for (i = 0; i < nrows; i++) {
5587     owner = 0;
5588     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5589     /* Set iremote for diag matrix */
5590     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5591       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5592       iremote[dntotalcols].rank  = owner;
5593       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5594       ilocal[dntotalcols++] = ntotalcols++;
5595     }
5596     /* off-diagonal */
5597     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5598       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5599       oiremote[ontotalcols].rank  = owner;
5600       oilocal[ontotalcols++]      = ntotalcols++;
5601     }
5602   }
5603   PetscCall(ISRestoreIndices(rows, &lrowindices));
5604   PetscCall(PetscFree(loffsets));
5605   PetscCall(PetscFree(nlcols));
5606   PetscCall(PetscSFCreate(comm, &sf));
5607   /* P serves as roots and P_oth is leaves
5608    * Diag matrix
5609    * */
5610   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5611   PetscCall(PetscSFSetFromOptions(sf));
5612   PetscCall(PetscSFSetUp(sf));
5613 
5614   PetscCall(PetscSFCreate(comm, &osf));
5615   /* off-diagonal */
5616   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5617   PetscCall(PetscSFSetFromOptions(osf));
5618   PetscCall(PetscSFSetUp(osf));
5619   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5620   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5621   /* operate on the matrix internal data to save memory */
5622   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5623   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5624   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5625   /* Convert to global indices for diag matrix */
5626   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5627   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5628   /* We want P_oth store global indices */
5629   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5630   /* Use memory scalable approach */
5631   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5632   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5633   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5634   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5635   /* Convert back to local indices */
5636   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5637   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5638   nout = 0;
5639   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5640   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5641   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5642   /* Exchange values */
5643   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5644   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5645   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5646   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5647   /* Stop PETSc from shrinking memory */
5648   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5649   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5650   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5651   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5652   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5653   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5654   PetscCall(PetscSFDestroy(&sf));
5655   PetscCall(PetscSFDestroy(&osf));
5656   PetscFunctionReturn(PETSC_SUCCESS);
5657 }
5658 
5659 /*
5660  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5661  * This supports MPIAIJ and MAIJ
5662  * */
5663 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5664 {
5665   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5666   Mat_SeqAIJ *p_oth;
5667   IS          rows, map;
5668   PetscHMapI  hamp;
5669   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5670   MPI_Comm    comm;
5671   PetscSF     sf, osf;
5672   PetscBool   has;
5673 
5674   PetscFunctionBegin;
5675   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5676   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5677   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5678    *  and then create a submatrix (that often is an overlapping matrix)
5679    * */
5680   if (reuse == MAT_INITIAL_MATRIX) {
5681     /* Use a hash table to figure out unique keys */
5682     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5683     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5684     count = 0;
5685     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5686     for (i = 0; i < a->B->cmap->n; i++) {
5687       key = a->garray[i] / dof;
5688       PetscCall(PetscHMapIHas(hamp, key, &has));
5689       if (!has) {
5690         mapping[i] = count;
5691         PetscCall(PetscHMapISet(hamp, key, count++));
5692       } else {
5693         /* Current 'i' has the same value the previous step */
5694         mapping[i] = count - 1;
5695       }
5696     }
5697     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5698     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5699     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5700     PetscCall(PetscCalloc1(htsize, &rowindices));
5701     off = 0;
5702     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5703     PetscCall(PetscHMapIDestroy(&hamp));
5704     PetscCall(PetscSortInt(htsize, rowindices));
5705     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5706     /* In case, the matrix was already created but users want to recreate the matrix */
5707     PetscCall(MatDestroy(P_oth));
5708     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5709     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5710     PetscCall(ISDestroy(&map));
5711     PetscCall(ISDestroy(&rows));
5712   } else if (reuse == MAT_REUSE_MATRIX) {
5713     /* If matrix was already created, we simply update values using SF objects
5714      * that as attached to the matrix earlier.
5715      */
5716     const PetscScalar *pd_a, *po_a;
5717 
5718     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5719     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5720     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5721     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5722     /* Update values in place */
5723     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5724     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5725     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5726     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5727     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5728     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5729     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5730     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5731   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5732   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5733   PetscFunctionReturn(PETSC_SUCCESS);
5734 }
5735 
5736 /*@C
5737   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5738 
5739   Collective
5740 
5741   Input Parameters:
5742 + A     - the first matrix in `MATMPIAIJ` format
5743 . B     - the second matrix in `MATMPIAIJ` format
5744 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5745 
5746   Output Parameters:
5747 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5748 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5749 - B_seq - the sequential matrix generated
5750 
5751   Level: developer
5752 
5753 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5754 @*/
5755 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5756 {
5757   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5758   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5759   IS          isrowb, iscolb;
5760   Mat        *bseq = NULL;
5761 
5762   PetscFunctionBegin;
5763   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5764              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5765   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5766 
5767   if (scall == MAT_INITIAL_MATRIX) {
5768     start = A->cmap->rstart;
5769     cmap  = a->garray;
5770     nzA   = a->A->cmap->n;
5771     nzB   = a->B->cmap->n;
5772     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5773     ncols = 0;
5774     for (i = 0; i < nzB; i++) { /* row < local row index */
5775       if (cmap[i] < start) idx[ncols++] = cmap[i];
5776       else break;
5777     }
5778     imark = i;
5779     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5780     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5781     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5782     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5783   } else {
5784     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5785     isrowb = *rowb;
5786     iscolb = *colb;
5787     PetscCall(PetscMalloc1(1, &bseq));
5788     bseq[0] = *B_seq;
5789   }
5790   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5791   *B_seq = bseq[0];
5792   PetscCall(PetscFree(bseq));
5793   if (!rowb) {
5794     PetscCall(ISDestroy(&isrowb));
5795   } else {
5796     *rowb = isrowb;
5797   }
5798   if (!colb) {
5799     PetscCall(ISDestroy(&iscolb));
5800   } else {
5801     *colb = iscolb;
5802   }
5803   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5804   PetscFunctionReturn(PETSC_SUCCESS);
5805 }
5806 
5807 /*
5808     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5809     of the OFF-DIAGONAL portion of local A
5810 
5811     Collective
5812 
5813    Input Parameters:
5814 +    A,B - the matrices in `MATMPIAIJ` format
5815 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5816 
5817    Output Parameter:
5818 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5819 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5820 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5821 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5822 
5823     Developer Note:
5824     This directly accesses information inside the VecScatter associated with the matrix-vector product
5825      for this matrix. This is not desirable..
5826 
5827     Level: developer
5828 
5829 */
5830 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5831 {
5832   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5833   Mat_SeqAIJ        *b_oth;
5834   VecScatter         ctx;
5835   MPI_Comm           comm;
5836   const PetscMPIInt *rprocs, *sprocs;
5837   const PetscInt    *srow, *rstarts, *sstarts;
5838   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5839   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5840   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5841   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5842   PetscMPIInt        size, tag, rank, nreqs;
5843 
5844   PetscFunctionBegin;
5845   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5846   PetscCallMPI(MPI_Comm_size(comm, &size));
5847 
5848   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5849              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5850   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5851   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5852 
5853   if (size == 1) {
5854     startsj_s = NULL;
5855     bufa_ptr  = NULL;
5856     *B_oth    = NULL;
5857     PetscFunctionReturn(PETSC_SUCCESS);
5858   }
5859 
5860   ctx = a->Mvctx;
5861   tag = ((PetscObject)ctx)->tag;
5862 
5863   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5864   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5865   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5866   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5867   PetscCall(PetscMalloc1(nreqs, &reqs));
5868   rwaits = reqs;
5869   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5870 
5871   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5872   if (scall == MAT_INITIAL_MATRIX) {
5873     /* i-array */
5874     /*  post receives */
5875     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5876     for (i = 0; i < nrecvs; i++) {
5877       rowlen = rvalues + rstarts[i] * rbs;
5878       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5879       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5880     }
5881 
5882     /* pack the outgoing message */
5883     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5884 
5885     sstartsj[0] = 0;
5886     rstartsj[0] = 0;
5887     len         = 0; /* total length of j or a array to be sent */
5888     if (nsends) {
5889       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5890       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5891     }
5892     for (i = 0; i < nsends; i++) {
5893       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5894       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5895       for (j = 0; j < nrows; j++) {
5896         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5897         for (l = 0; l < sbs; l++) {
5898           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5899 
5900           rowlen[j * sbs + l] = ncols;
5901 
5902           len += ncols;
5903           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5904         }
5905         k++;
5906       }
5907       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5908 
5909       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5910     }
5911     /* recvs and sends of i-array are completed */
5912     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5913     PetscCall(PetscFree(svalues));
5914 
5915     /* allocate buffers for sending j and a arrays */
5916     PetscCall(PetscMalloc1(len + 1, &bufj));
5917     PetscCall(PetscMalloc1(len + 1, &bufa));
5918 
5919     /* create i-array of B_oth */
5920     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5921 
5922     b_othi[0] = 0;
5923     len       = 0; /* total length of j or a array to be received */
5924     k         = 0;
5925     for (i = 0; i < nrecvs; i++) {
5926       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5927       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5928       for (j = 0; j < nrows; j++) {
5929         b_othi[k + 1] = b_othi[k] + rowlen[j];
5930         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5931         k++;
5932       }
5933       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5934     }
5935     PetscCall(PetscFree(rvalues));
5936 
5937     /* allocate space for j and a arrays of B_oth */
5938     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5939     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5940 
5941     /* j-array */
5942     /*  post receives of j-array */
5943     for (i = 0; i < nrecvs; i++) {
5944       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5945       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5946     }
5947 
5948     /* pack the outgoing message j-array */
5949     if (nsends) k = sstarts[0];
5950     for (i = 0; i < nsends; i++) {
5951       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5952       bufJ  = bufj + sstartsj[i];
5953       for (j = 0; j < nrows; j++) {
5954         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5955         for (ll = 0; ll < sbs; ll++) {
5956           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5957           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5958           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5959         }
5960       }
5961       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5962     }
5963 
5964     /* recvs and sends of j-array are completed */
5965     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5966   } else if (scall == MAT_REUSE_MATRIX) {
5967     sstartsj = *startsj_s;
5968     rstartsj = *startsj_r;
5969     bufa     = *bufa_ptr;
5970     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5971     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5972   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5973 
5974   /* a-array */
5975   /*  post receives of a-array */
5976   for (i = 0; i < nrecvs; i++) {
5977     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5978     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5979   }
5980 
5981   /* pack the outgoing message a-array */
5982   if (nsends) k = sstarts[0];
5983   for (i = 0; i < nsends; i++) {
5984     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5985     bufA  = bufa + sstartsj[i];
5986     for (j = 0; j < nrows; j++) {
5987       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5988       for (ll = 0; ll < sbs; ll++) {
5989         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5990         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5991         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5992       }
5993     }
5994     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5995   }
5996   /* recvs and sends of a-array are completed */
5997   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5998   PetscCall(PetscFree(reqs));
5999 
6000   if (scall == MAT_INITIAL_MATRIX) {
6001     /* put together the new matrix */
6002     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6003 
6004     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6005     /* Since these are PETSc arrays, change flags to free them as necessary. */
6006     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6007     b_oth->free_a  = PETSC_TRUE;
6008     b_oth->free_ij = PETSC_TRUE;
6009     b_oth->nonew   = 0;
6010 
6011     PetscCall(PetscFree(bufj));
6012     if (!startsj_s || !bufa_ptr) {
6013       PetscCall(PetscFree2(sstartsj, rstartsj));
6014       PetscCall(PetscFree(bufa_ptr));
6015     } else {
6016       *startsj_s = sstartsj;
6017       *startsj_r = rstartsj;
6018       *bufa_ptr  = bufa;
6019     }
6020   } else if (scall == MAT_REUSE_MATRIX) {
6021     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6022   }
6023 
6024   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6025   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6026   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6027   PetscFunctionReturn(PETSC_SUCCESS);
6028 }
6029 
6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6033 #if defined(PETSC_HAVE_MKL_SPARSE)
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6035 #endif
6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6038 #if defined(PETSC_HAVE_ELEMENTAL)
6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6040 #endif
6041 #if defined(PETSC_HAVE_SCALAPACK)
6042 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6043 #endif
6044 #if defined(PETSC_HAVE_HYPRE)
6045 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6046 #endif
6047 #if defined(PETSC_HAVE_CUDA)
6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6049 #endif
6050 #if defined(PETSC_HAVE_HIP)
6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6052 #endif
6053 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6055 #endif
6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6057 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6058 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6059 
6060 /*
6061     Computes (B'*A')' since computing B*A directly is untenable
6062 
6063                n                       p                          p
6064         [             ]       [             ]         [                 ]
6065       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6066         [             ]       [             ]         [                 ]
6067 
6068 */
6069 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6070 {
6071   Mat At, Bt, Ct;
6072 
6073   PetscFunctionBegin;
6074   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6075   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6076   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6077   PetscCall(MatDestroy(&At));
6078   PetscCall(MatDestroy(&Bt));
6079   PetscCall(MatTransposeSetPrecursor(Ct, C));
6080   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6081   PetscCall(MatDestroy(&Ct));
6082   PetscFunctionReturn(PETSC_SUCCESS);
6083 }
6084 
6085 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6086 {
6087   PetscBool cisdense;
6088 
6089   PetscFunctionBegin;
6090   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6091   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6092   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6093   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6094   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6095   PetscCall(MatSetUp(C));
6096 
6097   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6098   PetscFunctionReturn(PETSC_SUCCESS);
6099 }
6100 
6101 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6102 {
6103   Mat_Product *product = C->product;
6104   Mat          A = product->A, B = product->B;
6105 
6106   PetscFunctionBegin;
6107   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6108              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6109   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6110   C->ops->productsymbolic = MatProductSymbolic_AB;
6111   PetscFunctionReturn(PETSC_SUCCESS);
6112 }
6113 
6114 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6115 {
6116   Mat_Product *product = C->product;
6117 
6118   PetscFunctionBegin;
6119   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6120   PetscFunctionReturn(PETSC_SUCCESS);
6121 }
6122 
6123 /*
6124    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6125 
6126   Input Parameters:
6127 
6128     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6129     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6130 
6131     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6132 
6133     For Set1, j1[] contains column indices of the nonzeros.
6134     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6135     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6136     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6137 
6138     Similar for Set2.
6139 
6140     This routine merges the two sets of nonzeros row by row and removes repeats.
6141 
6142   Output Parameters: (memory is allocated by the caller)
6143 
6144     i[],j[]: the CSR of the merged matrix, which has m rows.
6145     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6146     imap2[]: similar to imap1[], but for Set2.
6147     Note we order nonzeros row-by-row and from left to right.
6148 */
6149 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6150 {
6151   PetscInt   r, m; /* Row index of mat */
6152   PetscCount t, t1, t2, b1, e1, b2, e2;
6153 
6154   PetscFunctionBegin;
6155   PetscCall(MatGetLocalSize(mat, &m, NULL));
6156   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6157   i[0]        = 0;
6158   for (r = 0; r < m; r++) { /* Do row by row merging */
6159     b1 = rowBegin1[r];
6160     e1 = rowEnd1[r];
6161     b2 = rowBegin2[r];
6162     e2 = rowEnd2[r];
6163     while (b1 < e1 && b2 < e2) {
6164       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6165         j[t]      = j1[b1];
6166         imap1[t1] = t;
6167         imap2[t2] = t;
6168         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6169         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6170         t1++;
6171         t2++;
6172         t++;
6173       } else if (j1[b1] < j2[b2]) {
6174         j[t]      = j1[b1];
6175         imap1[t1] = t;
6176         b1 += jmap1[t1 + 1] - jmap1[t1];
6177         t1++;
6178         t++;
6179       } else {
6180         j[t]      = j2[b2];
6181         imap2[t2] = t;
6182         b2 += jmap2[t2 + 1] - jmap2[t2];
6183         t2++;
6184         t++;
6185       }
6186     }
6187     /* Merge the remaining in either j1[] or j2[] */
6188     while (b1 < e1) {
6189       j[t]      = j1[b1];
6190       imap1[t1] = t;
6191       b1 += jmap1[t1 + 1] - jmap1[t1];
6192       t1++;
6193       t++;
6194     }
6195     while (b2 < e2) {
6196       j[t]      = j2[b2];
6197       imap2[t2] = t;
6198       b2 += jmap2[t2 + 1] - jmap2[t2];
6199       t2++;
6200       t++;
6201     }
6202     i[r + 1] = t;
6203   }
6204   PetscFunctionReturn(PETSC_SUCCESS);
6205 }
6206 
6207 /*
6208   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6209 
6210   Input Parameters:
6211     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6212     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6213       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6214 
6215       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6216       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6217 
6218   Output Parameters:
6219     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6220     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6221       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6222       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6223 
6224     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6225       Atot: number of entries belonging to the diagonal block.
6226       Annz: number of unique nonzeros belonging to the diagonal block.
6227       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6228         repeats (i.e., same 'i,j' pair).
6229       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6230         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6231 
6232       Atot: number of entries belonging to the diagonal block
6233       Annz: number of unique nonzeros belonging to the diagonal block.
6234 
6235     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6236 
6237     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6238 */
6239 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6240 {
6241   PetscInt    cstart, cend, rstart, rend, row, col;
6242   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6243   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6244   PetscCount  k, m, p, q, r, s, mid;
6245   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6246 
6247   PetscFunctionBegin;
6248   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6249   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6250   m = rend - rstart;
6251 
6252   /* Skip negative rows */
6253   for (k = 0; k < n; k++)
6254     if (i[k] >= 0) break;
6255 
6256   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6257      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6258   */
6259   while (k < n) {
6260     row = i[k];
6261     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6262     for (s = k; s < n; s++)
6263       if (i[s] != row) break;
6264 
6265     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6266     for (p = k; p < s; p++) {
6267       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6268       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6269     }
6270     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6271     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6272     rowBegin[row - rstart] = k;
6273     rowMid[row - rstart]   = mid;
6274     rowEnd[row - rstart]   = s;
6275 
6276     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6277     Atot += mid - k;
6278     Btot += s - mid;
6279 
6280     /* Count unique nonzeros of this diag row */
6281     for (p = k; p < mid;) {
6282       col = j[p];
6283       do {
6284         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6285         p++;
6286       } while (p < mid && j[p] == col);
6287       Annz++;
6288     }
6289 
6290     /* Count unique nonzeros of this offdiag row */
6291     for (p = mid; p < s;) {
6292       col = j[p];
6293       do {
6294         p++;
6295       } while (p < s && j[p] == col);
6296       Bnnz++;
6297     }
6298     k = s;
6299   }
6300 
6301   /* Allocation according to Atot, Btot, Annz, Bnnz */
6302   PetscCall(PetscMalloc1(Atot, &Aperm));
6303   PetscCall(PetscMalloc1(Btot, &Bperm));
6304   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6305   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6306 
6307   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6308   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6309   for (r = 0; r < m; r++) {
6310     k   = rowBegin[r];
6311     mid = rowMid[r];
6312     s   = rowEnd[r];
6313     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6314     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6315     Atot += mid - k;
6316     Btot += s - mid;
6317 
6318     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6319     for (p = k; p < mid;) {
6320       col = j[p];
6321       q   = p;
6322       do {
6323         p++;
6324       } while (p < mid && j[p] == col);
6325       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6326       Annz++;
6327     }
6328 
6329     for (p = mid; p < s;) {
6330       col = j[p];
6331       q   = p;
6332       do {
6333         p++;
6334       } while (p < s && j[p] == col);
6335       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6336       Bnnz++;
6337     }
6338   }
6339   /* Output */
6340   *Aperm_ = Aperm;
6341   *Annz_  = Annz;
6342   *Atot_  = Atot;
6343   *Ajmap_ = Ajmap;
6344   *Bperm_ = Bperm;
6345   *Bnnz_  = Bnnz;
6346   *Btot_  = Btot;
6347   *Bjmap_ = Bjmap;
6348   PetscFunctionReturn(PETSC_SUCCESS);
6349 }
6350 
6351 /*
6352   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6353 
6354   Input Parameters:
6355     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6356     nnz:  number of unique nonzeros in the merged matrix
6357     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6358     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6359 
6360   Output Parameter: (memory is allocated by the caller)
6361     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6362 
6363   Example:
6364     nnz1 = 4
6365     nnz  = 6
6366     imap = [1,3,4,5]
6367     jmap = [0,3,5,6,7]
6368    then,
6369     jmap_new = [0,0,3,3,5,6,7]
6370 */
6371 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6372 {
6373   PetscCount k, p;
6374 
6375   PetscFunctionBegin;
6376   jmap_new[0] = 0;
6377   p           = nnz;                /* p loops over jmap_new[] backwards */
6378   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6379     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6380   }
6381   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6382   PetscFunctionReturn(PETSC_SUCCESS);
6383 }
6384 
6385 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6386 {
6387   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6388 
6389   PetscFunctionBegin;
6390   PetscCall(PetscSFDestroy(&coo->sf));
6391   PetscCall(PetscFree(coo->Aperm1));
6392   PetscCall(PetscFree(coo->Bperm1));
6393   PetscCall(PetscFree(coo->Ajmap1));
6394   PetscCall(PetscFree(coo->Bjmap1));
6395   PetscCall(PetscFree(coo->Aimap2));
6396   PetscCall(PetscFree(coo->Bimap2));
6397   PetscCall(PetscFree(coo->Aperm2));
6398   PetscCall(PetscFree(coo->Bperm2));
6399   PetscCall(PetscFree(coo->Ajmap2));
6400   PetscCall(PetscFree(coo->Bjmap2));
6401   PetscCall(PetscFree(coo->Cperm1));
6402   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6403   PetscCall(PetscFree(coo));
6404   PetscFunctionReturn(PETSC_SUCCESS);
6405 }
6406 
6407 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6408 {
6409   MPI_Comm             comm;
6410   PetscMPIInt          rank, size;
6411   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6412   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6413   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6414   PetscContainer       container;
6415   MatCOOStruct_MPIAIJ *coo;
6416 
6417   PetscFunctionBegin;
6418   PetscCall(PetscFree(mpiaij->garray));
6419   PetscCall(VecDestroy(&mpiaij->lvec));
6420 #if defined(PETSC_USE_CTABLE)
6421   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6422 #else
6423   PetscCall(PetscFree(mpiaij->colmap));
6424 #endif
6425   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6426   mat->assembled     = PETSC_FALSE;
6427   mat->was_assembled = PETSC_FALSE;
6428 
6429   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6430   PetscCallMPI(MPI_Comm_size(comm, &size));
6431   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6432   PetscCall(PetscLayoutSetUp(mat->rmap));
6433   PetscCall(PetscLayoutSetUp(mat->cmap));
6434   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6435   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6436   PetscCall(MatGetLocalSize(mat, &m, &n));
6437   PetscCall(MatGetSize(mat, &M, &N));
6438 
6439   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6440   /* entries come first, then local rows, then remote rows.                     */
6441   PetscCount n1 = coo_n, *perm1;
6442   PetscInt  *i1 = coo_i, *j1 = coo_j;
6443 
6444   PetscCall(PetscMalloc1(n1, &perm1));
6445   for (k = 0; k < n1; k++) perm1[k] = k;
6446 
6447   /* Manipulate indices so that entries with negative row or col indices will have smallest
6448      row indices, local entries will have greater but negative row indices, and remote entries
6449      will have positive row indices.
6450   */
6451   for (k = 0; k < n1; k++) {
6452     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6453     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6454     else {
6455       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6456       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6457     }
6458   }
6459 
6460   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6461   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6462 
6463   /* Advance k to the first entry we need to take care of */
6464   for (k = 0; k < n1; k++)
6465     if (i1[k] > PETSC_MIN_INT) break;
6466   PetscInt i1start = k;
6467 
6468   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6469   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6470 
6471   /*           Send remote rows to their owner                                  */
6472   /* Find which rows should be sent to which remote ranks*/
6473   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6474   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6475   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6476   const PetscInt *ranges;
6477   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6478 
6479   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6480   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6481   for (k = rem; k < n1;) {
6482     PetscMPIInt owner;
6483     PetscInt    firstRow, lastRow;
6484 
6485     /* Locate a row range */
6486     firstRow = i1[k]; /* first row of this owner */
6487     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6488     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6489 
6490     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6491     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6492 
6493     /* All entries in [k,p) belong to this remote owner */
6494     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6495       PetscMPIInt *sendto2;
6496       PetscInt    *nentries2;
6497       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6498 
6499       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6500       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6501       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6502       PetscCall(PetscFree2(sendto, nentries2));
6503       sendto   = sendto2;
6504       nentries = nentries2;
6505       maxNsend = maxNsend2;
6506     }
6507     sendto[nsend]   = owner;
6508     nentries[nsend] = p - k;
6509     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6510     nsend++;
6511     k = p;
6512   }
6513 
6514   /* Build 1st SF to know offsets on remote to send data */
6515   PetscSF      sf1;
6516   PetscInt     nroots = 1, nroots2 = 0;
6517   PetscInt     nleaves = nsend, nleaves2 = 0;
6518   PetscInt    *offsets;
6519   PetscSFNode *iremote;
6520 
6521   PetscCall(PetscSFCreate(comm, &sf1));
6522   PetscCall(PetscMalloc1(nsend, &iremote));
6523   PetscCall(PetscMalloc1(nsend, &offsets));
6524   for (k = 0; k < nsend; k++) {
6525     iremote[k].rank  = sendto[k];
6526     iremote[k].index = 0;
6527     nleaves2 += nentries[k];
6528     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6529   }
6530   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6531   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6532   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6533   PetscCall(PetscSFDestroy(&sf1));
6534   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6535 
6536   /* Build 2nd SF to send remote COOs to their owner */
6537   PetscSF sf2;
6538   nroots  = nroots2;
6539   nleaves = nleaves2;
6540   PetscCall(PetscSFCreate(comm, &sf2));
6541   PetscCall(PetscSFSetFromOptions(sf2));
6542   PetscCall(PetscMalloc1(nleaves, &iremote));
6543   p = 0;
6544   for (k = 0; k < nsend; k++) {
6545     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6546     for (q = 0; q < nentries[k]; q++, p++) {
6547       iremote[p].rank  = sendto[k];
6548       iremote[p].index = offsets[k] + q;
6549     }
6550   }
6551   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6552 
6553   /* Send the remote COOs to their owner */
6554   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6555   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6556   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6557   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6558   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6559   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6560   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6561 
6562   PetscCall(PetscFree(offsets));
6563   PetscCall(PetscFree2(sendto, nentries));
6564 
6565   /* Sort received COOs by row along with the permutation array     */
6566   for (k = 0; k < n2; k++) perm2[k] = k;
6567   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6568 
6569   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6570   PetscCount *Cperm1;
6571   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6572   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves));
6573 
6574   /* Support for HYPRE matrices, kind of a hack.
6575      Swap min column with diagonal so that diagonal values will go first */
6576   PetscBool   hypre;
6577   const char *name;
6578   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6579   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6580   if (hypre) {
6581     PetscInt *minj;
6582     PetscBT   hasdiag;
6583 
6584     PetscCall(PetscBTCreate(m, &hasdiag));
6585     PetscCall(PetscMalloc1(m, &minj));
6586     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6587     for (k = i1start; k < rem; k++) {
6588       if (j1[k] < cstart || j1[k] >= cend) continue;
6589       const PetscInt rindex = i1[k] - rstart;
6590       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6591       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6592     }
6593     for (k = 0; k < n2; k++) {
6594       if (j2[k] < cstart || j2[k] >= cend) continue;
6595       const PetscInt rindex = i2[k] - rstart;
6596       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6597       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6598     }
6599     for (k = i1start; k < rem; k++) {
6600       const PetscInt rindex = i1[k] - rstart;
6601       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6602       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6603       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6604     }
6605     for (k = 0; k < n2; k++) {
6606       const PetscInt rindex = i2[k] - rstart;
6607       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6608       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6609       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6610     }
6611     PetscCall(PetscBTDestroy(&hasdiag));
6612     PetscCall(PetscFree(minj));
6613   }
6614 
6615   /* Split local COOs and received COOs into diag/offdiag portions */
6616   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6617   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6618   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6619   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6620   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6621   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6622 
6623   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6624   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6625   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6626   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6627 
6628   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6629   PetscInt *Ai, *Bi;
6630   PetscInt *Aj, *Bj;
6631 
6632   PetscCall(PetscMalloc1(m + 1, &Ai));
6633   PetscCall(PetscMalloc1(m + 1, &Bi));
6634   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6635   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6636 
6637   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6638   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6639   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6640   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6641   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6642 
6643   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6644   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6645 
6646   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6647   /* expect nonzeros in A/B most likely have local contributing entries        */
6648   PetscInt    Annz = Ai[m];
6649   PetscInt    Bnnz = Bi[m];
6650   PetscCount *Ajmap1_new, *Bjmap1_new;
6651 
6652   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6653   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6654 
6655   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6656   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6657 
6658   PetscCall(PetscFree(Aimap1));
6659   PetscCall(PetscFree(Ajmap1));
6660   PetscCall(PetscFree(Bimap1));
6661   PetscCall(PetscFree(Bjmap1));
6662   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6663   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6664   PetscCall(PetscFree(perm1));
6665   PetscCall(PetscFree3(i2, j2, perm2));
6666 
6667   Ajmap1 = Ajmap1_new;
6668   Bjmap1 = Bjmap1_new;
6669 
6670   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6671   if (Annz < Annz1 + Annz2) {
6672     PetscInt *Aj_new;
6673     PetscCall(PetscMalloc1(Annz, &Aj_new));
6674     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6675     PetscCall(PetscFree(Aj));
6676     Aj = Aj_new;
6677   }
6678 
6679   if (Bnnz < Bnnz1 + Bnnz2) {
6680     PetscInt *Bj_new;
6681     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6682     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6683     PetscCall(PetscFree(Bj));
6684     Bj = Bj_new;
6685   }
6686 
6687   /* Create new submatrices for on-process and off-process coupling                  */
6688   PetscScalar     *Aa, *Ba;
6689   MatType          rtype;
6690   Mat_SeqAIJ      *a, *b;
6691   PetscObjectState state;
6692   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6693   PetscCall(PetscCalloc1(Bnnz, &Ba));
6694   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6695   if (cstart) {
6696     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6697   }
6698   PetscCall(MatDestroy(&mpiaij->A));
6699   PetscCall(MatDestroy(&mpiaij->B));
6700   PetscCall(MatGetRootType_Private(mat, &rtype));
6701   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6702   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6703   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6704   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6705   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6706   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6707 
6708   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6709   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6710   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6711   a->free_a = b->free_a = PETSC_TRUE;
6712   a->free_ij = b->free_ij = PETSC_TRUE;
6713 
6714   /* conversion must happen AFTER multiply setup */
6715   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6716   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6717   PetscCall(VecDestroy(&mpiaij->lvec));
6718   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6719 
6720   // Put the COO struct in a container and then attach that to the matrix
6721   PetscCall(PetscMalloc1(1, &coo));
6722   coo->n       = coo_n;
6723   coo->sf      = sf2;
6724   coo->sendlen = nleaves;
6725   coo->recvlen = nroots;
6726   coo->Annz    = Annz;
6727   coo->Bnnz    = Bnnz;
6728   coo->Annz2   = Annz2;
6729   coo->Bnnz2   = Bnnz2;
6730   coo->Atot1   = Atot1;
6731   coo->Atot2   = Atot2;
6732   coo->Btot1   = Btot1;
6733   coo->Btot2   = Btot2;
6734   coo->Ajmap1  = Ajmap1;
6735   coo->Aperm1  = Aperm1;
6736   coo->Bjmap1  = Bjmap1;
6737   coo->Bperm1  = Bperm1;
6738   coo->Aimap2  = Aimap2;
6739   coo->Ajmap2  = Ajmap2;
6740   coo->Aperm2  = Aperm2;
6741   coo->Bimap2  = Bimap2;
6742   coo->Bjmap2  = Bjmap2;
6743   coo->Bperm2  = Bperm2;
6744   coo->Cperm1  = Cperm1;
6745   // Allocate in preallocation. If not used, it has zero cost on host
6746   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6747   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6748   PetscCall(PetscContainerSetPointer(container, coo));
6749   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6750   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6751   PetscCall(PetscContainerDestroy(&container));
6752   PetscFunctionReturn(PETSC_SUCCESS);
6753 }
6754 
6755 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6756 {
6757   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6758   Mat                  A = mpiaij->A, B = mpiaij->B;
6759   PetscScalar         *Aa, *Ba;
6760   PetscScalar         *sendbuf, *recvbuf;
6761   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6762   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6763   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6764   const PetscCount    *Cperm1;
6765   PetscContainer       container;
6766   MatCOOStruct_MPIAIJ *coo;
6767 
6768   PetscFunctionBegin;
6769   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6770   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6771   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6772   sendbuf = coo->sendbuf;
6773   recvbuf = coo->recvbuf;
6774   Ajmap1  = coo->Ajmap1;
6775   Ajmap2  = coo->Ajmap2;
6776   Aimap2  = coo->Aimap2;
6777   Bjmap1  = coo->Bjmap1;
6778   Bjmap2  = coo->Bjmap2;
6779   Bimap2  = coo->Bimap2;
6780   Aperm1  = coo->Aperm1;
6781   Aperm2  = coo->Aperm2;
6782   Bperm1  = coo->Bperm1;
6783   Bperm2  = coo->Bperm2;
6784   Cperm1  = coo->Cperm1;
6785 
6786   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6787   PetscCall(MatSeqAIJGetArray(B, &Ba));
6788 
6789   /* Pack entries to be sent to remote */
6790   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6791 
6792   /* Send remote entries to their owner and overlap the communication with local computation */
6793   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6794   /* Add local entries to A and B */
6795   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6796     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6797     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6798     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6799   }
6800   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6801     PetscScalar sum = 0.0;
6802     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6803     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6804   }
6805   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6806 
6807   /* Add received remote entries to A and B */
6808   for (PetscCount i = 0; i < coo->Annz2; i++) {
6809     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6810   }
6811   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6812     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6813   }
6814   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6815   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6816   PetscFunctionReturn(PETSC_SUCCESS);
6817 }
6818 
6819 /*MC
6820    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6821 
6822    Options Database Keys:
6823 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6824 
6825    Level: beginner
6826 
6827    Notes:
6828    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6829     in this case the values associated with the rows and columns one passes in are set to zero
6830     in the matrix
6831 
6832     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6833     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6834 
6835 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6836 M*/
6837 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6838 {
6839   Mat_MPIAIJ *b;
6840   PetscMPIInt size;
6841 
6842   PetscFunctionBegin;
6843   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6844 
6845   PetscCall(PetscNew(&b));
6846   B->data       = (void *)b;
6847   B->ops[0]     = MatOps_Values;
6848   B->assembled  = PETSC_FALSE;
6849   B->insertmode = NOT_SET_VALUES;
6850   b->size       = size;
6851 
6852   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6853 
6854   /* build cache for off array entries formed */
6855   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6856 
6857   b->donotstash  = PETSC_FALSE;
6858   b->colmap      = NULL;
6859   b->garray      = NULL;
6860   b->roworiented = PETSC_TRUE;
6861 
6862   /* stuff used for matrix vector multiply */
6863   b->lvec  = NULL;
6864   b->Mvctx = NULL;
6865 
6866   /* stuff for MatGetRow() */
6867   b->rowindices   = NULL;
6868   b->rowvalues    = NULL;
6869   b->getrowactive = PETSC_FALSE;
6870 
6871   /* flexible pointer used in CUSPARSE classes */
6872   b->spptr = NULL;
6873 
6874   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6875   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6876   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6877   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6878   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6879   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6880   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6881   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6882   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6883   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6884 #if defined(PETSC_HAVE_CUDA)
6885   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6886 #endif
6887 #if defined(PETSC_HAVE_HIP)
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6889 #endif
6890 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6892 #endif
6893 #if defined(PETSC_HAVE_MKL_SPARSE)
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6895 #endif
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6900 #if defined(PETSC_HAVE_ELEMENTAL)
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6902 #endif
6903 #if defined(PETSC_HAVE_SCALAPACK)
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6905 #endif
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6908 #if defined(PETSC_HAVE_HYPRE)
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6911 #endif
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6916   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6917   PetscFunctionReturn(PETSC_SUCCESS);
6918 }
6919 
6920 /*@C
6921   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6922   and "off-diagonal" part of the matrix in CSR format.
6923 
6924   Collective
6925 
6926   Input Parameters:
6927 + comm - MPI communicator
6928 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6929 . n    - This value should be the same as the local size used in creating the
6930        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6931        calculated if `N` is given) For square matrices `n` is almost always `m`.
6932 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6933 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6934 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6935 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6936 . a    - matrix values
6937 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6938 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6939 - oa   - matrix values
6940 
6941   Output Parameter:
6942 . mat - the matrix
6943 
6944   Level: advanced
6945 
6946   Notes:
6947   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6948   must free the arrays once the matrix has been destroyed and not before.
6949 
6950   The `i` and `j` indices are 0 based
6951 
6952   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6953 
6954   This sets local rows and cannot be used to set off-processor values.
6955 
6956   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6957   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6958   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6959   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6960   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6961   communication if it is known that only local entries will be set.
6962 
6963 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6964           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6965 @*/
6966 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6967 {
6968   Mat_MPIAIJ *maij;
6969 
6970   PetscFunctionBegin;
6971   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6972   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6973   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6974   PetscCall(MatCreate(comm, mat));
6975   PetscCall(MatSetSizes(*mat, m, n, M, N));
6976   PetscCall(MatSetType(*mat, MATMPIAIJ));
6977   maij = (Mat_MPIAIJ *)(*mat)->data;
6978 
6979   (*mat)->preallocated = PETSC_TRUE;
6980 
6981   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6982   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6983 
6984   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6985   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6986 
6987   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6988   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6989   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6990   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6991   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6992   PetscFunctionReturn(PETSC_SUCCESS);
6993 }
6994 
6995 typedef struct {
6996   Mat       *mp;    /* intermediate products */
6997   PetscBool *mptmp; /* is the intermediate product temporary ? */
6998   PetscInt   cp;    /* number of intermediate products */
6999 
7000   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7001   PetscInt    *startsj_s, *startsj_r;
7002   PetscScalar *bufa;
7003   Mat          P_oth;
7004 
7005   /* may take advantage of merging product->B */
7006   Mat Bloc; /* B-local by merging diag and off-diag */
7007 
7008   /* cusparse does not have support to split between symbolic and numeric phases.
7009      When api_user is true, we don't need to update the numerical values
7010      of the temporary storage */
7011   PetscBool reusesym;
7012 
7013   /* support for COO values insertion */
7014   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7015   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7016   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7017   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7018   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7019   PetscMemType mtype;
7020 
7021   /* customization */
7022   PetscBool abmerge;
7023   PetscBool P_oth_bind;
7024 } MatMatMPIAIJBACKEND;
7025 
7026 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7027 {
7028   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7029   PetscInt             i;
7030 
7031   PetscFunctionBegin;
7032   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7033   PetscCall(PetscFree(mmdata->bufa));
7034   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7035   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7036   PetscCall(MatDestroy(&mmdata->P_oth));
7037   PetscCall(MatDestroy(&mmdata->Bloc));
7038   PetscCall(PetscSFDestroy(&mmdata->sf));
7039   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7040   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7041   PetscCall(PetscFree(mmdata->own[0]));
7042   PetscCall(PetscFree(mmdata->own));
7043   PetscCall(PetscFree(mmdata->off[0]));
7044   PetscCall(PetscFree(mmdata->off));
7045   PetscCall(PetscFree(mmdata));
7046   PetscFunctionReturn(PETSC_SUCCESS);
7047 }
7048 
7049 /* Copy selected n entries with indices in idx[] of A to v[].
7050    If idx is NULL, copy the whole data array of A to v[]
7051  */
7052 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7053 {
7054   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7055 
7056   PetscFunctionBegin;
7057   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7058   if (f) {
7059     PetscCall((*f)(A, n, idx, v));
7060   } else {
7061     const PetscScalar *vv;
7062 
7063     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7064     if (n && idx) {
7065       PetscScalar    *w  = v;
7066       const PetscInt *oi = idx;
7067       PetscInt        j;
7068 
7069       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7070     } else {
7071       PetscCall(PetscArraycpy(v, vv, n));
7072     }
7073     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7074   }
7075   PetscFunctionReturn(PETSC_SUCCESS);
7076 }
7077 
7078 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7079 {
7080   MatMatMPIAIJBACKEND *mmdata;
7081   PetscInt             i, n_d, n_o;
7082 
7083   PetscFunctionBegin;
7084   MatCheckProduct(C, 1);
7085   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7086   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7087   if (!mmdata->reusesym) { /* update temporary matrices */
7088     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7089     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7090   }
7091   mmdata->reusesym = PETSC_FALSE;
7092 
7093   for (i = 0; i < mmdata->cp; i++) {
7094     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7095     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7096   }
7097   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7098     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7099 
7100     if (mmdata->mptmp[i]) continue;
7101     if (noff) {
7102       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7103 
7104       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7105       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7106       n_o += noff;
7107       n_d += nown;
7108     } else {
7109       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7110 
7111       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7112       n_d += mm->nz;
7113     }
7114   }
7115   if (mmdata->hasoffproc) { /* offprocess insertion */
7116     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7117     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7118   }
7119   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7120   PetscFunctionReturn(PETSC_SUCCESS);
7121 }
7122 
7123 /* Support for Pt * A, A * P, or Pt * A * P */
7124 #define MAX_NUMBER_INTERMEDIATE 4
7125 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7126 {
7127   Mat_Product           *product = C->product;
7128   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7129   Mat_MPIAIJ            *a, *p;
7130   MatMatMPIAIJBACKEND   *mmdata;
7131   ISLocalToGlobalMapping P_oth_l2g = NULL;
7132   IS                     glob      = NULL;
7133   const char            *prefix;
7134   char                   pprefix[256];
7135   const PetscInt        *globidx, *P_oth_idx;
7136   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7137   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7138   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7139                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7140                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7141   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7142 
7143   MatProductType ptype;
7144   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7145   PetscMPIInt    size;
7146 
7147   PetscFunctionBegin;
7148   MatCheckProduct(C, 1);
7149   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7150   ptype = product->type;
7151   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7152     ptype                                          = MATPRODUCT_AB;
7153     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7154   }
7155   switch (ptype) {
7156   case MATPRODUCT_AB:
7157     A          = product->A;
7158     P          = product->B;
7159     m          = A->rmap->n;
7160     n          = P->cmap->n;
7161     M          = A->rmap->N;
7162     N          = P->cmap->N;
7163     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7164     break;
7165   case MATPRODUCT_AtB:
7166     P          = product->A;
7167     A          = product->B;
7168     m          = P->cmap->n;
7169     n          = A->cmap->n;
7170     M          = P->cmap->N;
7171     N          = A->cmap->N;
7172     hasoffproc = PETSC_TRUE;
7173     break;
7174   case MATPRODUCT_PtAP:
7175     A          = product->A;
7176     P          = product->B;
7177     m          = P->cmap->n;
7178     n          = P->cmap->n;
7179     M          = P->cmap->N;
7180     N          = P->cmap->N;
7181     hasoffproc = PETSC_TRUE;
7182     break;
7183   default:
7184     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7185   }
7186   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7187   if (size == 1) hasoffproc = PETSC_FALSE;
7188 
7189   /* defaults */
7190   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7191     mp[i]    = NULL;
7192     mptmp[i] = PETSC_FALSE;
7193     rmapt[i] = -1;
7194     cmapt[i] = -1;
7195     rmapa[i] = NULL;
7196     cmapa[i] = NULL;
7197   }
7198 
7199   /* customization */
7200   PetscCall(PetscNew(&mmdata));
7201   mmdata->reusesym = product->api_user;
7202   if (ptype == MATPRODUCT_AB) {
7203     if (product->api_user) {
7204       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7205       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7206       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7207       PetscOptionsEnd();
7208     } else {
7209       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7210       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7211       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7212       PetscOptionsEnd();
7213     }
7214   } else if (ptype == MATPRODUCT_PtAP) {
7215     if (product->api_user) {
7216       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7217       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7218       PetscOptionsEnd();
7219     } else {
7220       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7221       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7222       PetscOptionsEnd();
7223     }
7224   }
7225   a = (Mat_MPIAIJ *)A->data;
7226   p = (Mat_MPIAIJ *)P->data;
7227   PetscCall(MatSetSizes(C, m, n, M, N));
7228   PetscCall(PetscLayoutSetUp(C->rmap));
7229   PetscCall(PetscLayoutSetUp(C->cmap));
7230   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7231   PetscCall(MatGetOptionsPrefix(C, &prefix));
7232 
7233   cp = 0;
7234   switch (ptype) {
7235   case MATPRODUCT_AB: /* A * P */
7236     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7237 
7238     /* A_diag * P_local (merged or not) */
7239     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7240       /* P is product->B */
7241       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7242       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7243       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7244       PetscCall(MatProductSetFill(mp[cp], product->fill));
7245       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7246       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7247       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7248       mp[cp]->product->api_user = product->api_user;
7249       PetscCall(MatProductSetFromOptions(mp[cp]));
7250       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7251       PetscCall(ISGetIndices(glob, &globidx));
7252       rmapt[cp] = 1;
7253       cmapt[cp] = 2;
7254       cmapa[cp] = globidx;
7255       mptmp[cp] = PETSC_FALSE;
7256       cp++;
7257     } else { /* A_diag * P_diag and A_diag * P_off */
7258       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7259       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7260       PetscCall(MatProductSetFill(mp[cp], product->fill));
7261       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7262       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7263       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7264       mp[cp]->product->api_user = product->api_user;
7265       PetscCall(MatProductSetFromOptions(mp[cp]));
7266       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7267       rmapt[cp] = 1;
7268       cmapt[cp] = 1;
7269       mptmp[cp] = PETSC_FALSE;
7270       cp++;
7271       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7272       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7273       PetscCall(MatProductSetFill(mp[cp], product->fill));
7274       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7275       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7276       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7277       mp[cp]->product->api_user = product->api_user;
7278       PetscCall(MatProductSetFromOptions(mp[cp]));
7279       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7280       rmapt[cp] = 1;
7281       cmapt[cp] = 2;
7282       cmapa[cp] = p->garray;
7283       mptmp[cp] = PETSC_FALSE;
7284       cp++;
7285     }
7286 
7287     /* A_off * P_other */
7288     if (mmdata->P_oth) {
7289       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7290       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7291       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7292       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7293       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7294       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7295       PetscCall(MatProductSetFill(mp[cp], product->fill));
7296       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7297       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7298       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7299       mp[cp]->product->api_user = product->api_user;
7300       PetscCall(MatProductSetFromOptions(mp[cp]));
7301       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7302       rmapt[cp] = 1;
7303       cmapt[cp] = 2;
7304       cmapa[cp] = P_oth_idx;
7305       mptmp[cp] = PETSC_FALSE;
7306       cp++;
7307     }
7308     break;
7309 
7310   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7311     /* A is product->B */
7312     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7313     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7314       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7315       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7316       PetscCall(MatProductSetFill(mp[cp], product->fill));
7317       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7318       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7319       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7320       mp[cp]->product->api_user = product->api_user;
7321       PetscCall(MatProductSetFromOptions(mp[cp]));
7322       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7323       PetscCall(ISGetIndices(glob, &globidx));
7324       rmapt[cp] = 2;
7325       rmapa[cp] = globidx;
7326       cmapt[cp] = 2;
7327       cmapa[cp] = globidx;
7328       mptmp[cp] = PETSC_FALSE;
7329       cp++;
7330     } else {
7331       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7332       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7333       PetscCall(MatProductSetFill(mp[cp], product->fill));
7334       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7335       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7336       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7337       mp[cp]->product->api_user = product->api_user;
7338       PetscCall(MatProductSetFromOptions(mp[cp]));
7339       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7340       PetscCall(ISGetIndices(glob, &globidx));
7341       rmapt[cp] = 1;
7342       cmapt[cp] = 2;
7343       cmapa[cp] = globidx;
7344       mptmp[cp] = PETSC_FALSE;
7345       cp++;
7346       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7347       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7348       PetscCall(MatProductSetFill(mp[cp], product->fill));
7349       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7350       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7351       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7352       mp[cp]->product->api_user = product->api_user;
7353       PetscCall(MatProductSetFromOptions(mp[cp]));
7354       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7355       rmapt[cp] = 2;
7356       rmapa[cp] = p->garray;
7357       cmapt[cp] = 2;
7358       cmapa[cp] = globidx;
7359       mptmp[cp] = PETSC_FALSE;
7360       cp++;
7361     }
7362     break;
7363   case MATPRODUCT_PtAP:
7364     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7365     /* P is product->B */
7366     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7367     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7368     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7369     PetscCall(MatProductSetFill(mp[cp], product->fill));
7370     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7371     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7372     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7373     mp[cp]->product->api_user = product->api_user;
7374     PetscCall(MatProductSetFromOptions(mp[cp]));
7375     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7376     PetscCall(ISGetIndices(glob, &globidx));
7377     rmapt[cp] = 2;
7378     rmapa[cp] = globidx;
7379     cmapt[cp] = 2;
7380     cmapa[cp] = globidx;
7381     mptmp[cp] = PETSC_FALSE;
7382     cp++;
7383     if (mmdata->P_oth) {
7384       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7385       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7386       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7387       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7388       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7389       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7390       PetscCall(MatProductSetFill(mp[cp], product->fill));
7391       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7392       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7393       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7394       mp[cp]->product->api_user = product->api_user;
7395       PetscCall(MatProductSetFromOptions(mp[cp]));
7396       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7397       mptmp[cp] = PETSC_TRUE;
7398       cp++;
7399       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7400       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7401       PetscCall(MatProductSetFill(mp[cp], product->fill));
7402       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7403       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7404       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7405       mp[cp]->product->api_user = product->api_user;
7406       PetscCall(MatProductSetFromOptions(mp[cp]));
7407       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7408       rmapt[cp] = 2;
7409       rmapa[cp] = globidx;
7410       cmapt[cp] = 2;
7411       cmapa[cp] = P_oth_idx;
7412       mptmp[cp] = PETSC_FALSE;
7413       cp++;
7414     }
7415     break;
7416   default:
7417     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7418   }
7419   /* sanity check */
7420   if (size > 1)
7421     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7422 
7423   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7424   for (i = 0; i < cp; i++) {
7425     mmdata->mp[i]    = mp[i];
7426     mmdata->mptmp[i] = mptmp[i];
7427   }
7428   mmdata->cp             = cp;
7429   C->product->data       = mmdata;
7430   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7431   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7432 
7433   /* memory type */
7434   mmdata->mtype = PETSC_MEMTYPE_HOST;
7435   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7436   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7437   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7438   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7439   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7440   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7441 
7442   /* prepare coo coordinates for values insertion */
7443 
7444   /* count total nonzeros of those intermediate seqaij Mats
7445     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7446     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7447     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7448   */
7449   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7450     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7451     if (mptmp[cp]) continue;
7452     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7453       const PetscInt *rmap = rmapa[cp];
7454       const PetscInt  mr   = mp[cp]->rmap->n;
7455       const PetscInt  rs   = C->rmap->rstart;
7456       const PetscInt  re   = C->rmap->rend;
7457       const PetscInt *ii   = mm->i;
7458       for (i = 0; i < mr; i++) {
7459         const PetscInt gr = rmap[i];
7460         const PetscInt nz = ii[i + 1] - ii[i];
7461         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7462         else ncoo_oown += nz;                  /* this row is local */
7463       }
7464     } else ncoo_d += mm->nz;
7465   }
7466 
7467   /*
7468     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7469 
7470     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7471 
7472     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7473 
7474     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7475     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7476     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7477 
7478     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7479     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7480   */
7481   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7482   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7483 
7484   /* gather (i,j) of nonzeros inserted by remote procs */
7485   if (hasoffproc) {
7486     PetscSF  msf;
7487     PetscInt ncoo2, *coo_i2, *coo_j2;
7488 
7489     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7490     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7491     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7492 
7493     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7494       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7495       PetscInt   *idxoff = mmdata->off[cp];
7496       PetscInt   *idxown = mmdata->own[cp];
7497       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7498         const PetscInt *rmap = rmapa[cp];
7499         const PetscInt *cmap = cmapa[cp];
7500         const PetscInt *ii   = mm->i;
7501         PetscInt       *coi  = coo_i + ncoo_o;
7502         PetscInt       *coj  = coo_j + ncoo_o;
7503         const PetscInt  mr   = mp[cp]->rmap->n;
7504         const PetscInt  rs   = C->rmap->rstart;
7505         const PetscInt  re   = C->rmap->rend;
7506         const PetscInt  cs   = C->cmap->rstart;
7507         for (i = 0; i < mr; i++) {
7508           const PetscInt *jj = mm->j + ii[i];
7509           const PetscInt  gr = rmap[i];
7510           const PetscInt  nz = ii[i + 1] - ii[i];
7511           if (gr < rs || gr >= re) { /* this is an offproc row */
7512             for (j = ii[i]; j < ii[i + 1]; j++) {
7513               *coi++    = gr;
7514               *idxoff++ = j;
7515             }
7516             if (!cmapt[cp]) { /* already global */
7517               for (j = 0; j < nz; j++) *coj++ = jj[j];
7518             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7519               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7520             } else { /* offdiag */
7521               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7522             }
7523             ncoo_o += nz;
7524           } else { /* this is a local row */
7525             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7526           }
7527         }
7528       }
7529       mmdata->off[cp + 1] = idxoff;
7530       mmdata->own[cp + 1] = idxown;
7531     }
7532 
7533     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7534     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7535     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7536     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7537     ncoo = ncoo_d + ncoo_oown + ncoo2;
7538     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7539     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7540     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7541     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7542     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7543     PetscCall(PetscFree2(coo_i, coo_j));
7544     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7545     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7546     coo_i = coo_i2;
7547     coo_j = coo_j2;
7548   } else { /* no offproc values insertion */
7549     ncoo = ncoo_d;
7550     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7551 
7552     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7553     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7554     PetscCall(PetscSFSetUp(mmdata->sf));
7555   }
7556   mmdata->hasoffproc = hasoffproc;
7557 
7558   /* gather (i,j) of nonzeros inserted locally */
7559   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7560     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7561     PetscInt       *coi  = coo_i + ncoo_d;
7562     PetscInt       *coj  = coo_j + ncoo_d;
7563     const PetscInt *jj   = mm->j;
7564     const PetscInt *ii   = mm->i;
7565     const PetscInt *cmap = cmapa[cp];
7566     const PetscInt *rmap = rmapa[cp];
7567     const PetscInt  mr   = mp[cp]->rmap->n;
7568     const PetscInt  rs   = C->rmap->rstart;
7569     const PetscInt  re   = C->rmap->rend;
7570     const PetscInt  cs   = C->cmap->rstart;
7571 
7572     if (mptmp[cp]) continue;
7573     if (rmapt[cp] == 1) { /* consecutive rows */
7574       /* fill coo_i */
7575       for (i = 0; i < mr; i++) {
7576         const PetscInt gr = i + rs;
7577         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7578       }
7579       /* fill coo_j */
7580       if (!cmapt[cp]) { /* type-0, already global */
7581         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7582       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7583         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7584       } else {                                            /* type-2, local to global for sparse columns */
7585         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7586       }
7587       ncoo_d += mm->nz;
7588     } else if (rmapt[cp] == 2) { /* sparse rows */
7589       for (i = 0; i < mr; i++) {
7590         const PetscInt *jj = mm->j + ii[i];
7591         const PetscInt  gr = rmap[i];
7592         const PetscInt  nz = ii[i + 1] - ii[i];
7593         if (gr >= rs && gr < re) { /* local rows */
7594           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7595           if (!cmapt[cp]) { /* type-0, already global */
7596             for (j = 0; j < nz; j++) *coj++ = jj[j];
7597           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7598             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7599           } else { /* type-2, local to global for sparse columns */
7600             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7601           }
7602           ncoo_d += nz;
7603         }
7604       }
7605     }
7606   }
7607   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7608   PetscCall(ISDestroy(&glob));
7609   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7610   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7611   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7612   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7613 
7614   /* preallocate with COO data */
7615   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7616   PetscCall(PetscFree2(coo_i, coo_j));
7617   PetscFunctionReturn(PETSC_SUCCESS);
7618 }
7619 
7620 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7621 {
7622   Mat_Product *product = mat->product;
7623 #if defined(PETSC_HAVE_DEVICE)
7624   PetscBool match  = PETSC_FALSE;
7625   PetscBool usecpu = PETSC_FALSE;
7626 #else
7627   PetscBool match = PETSC_TRUE;
7628 #endif
7629 
7630   PetscFunctionBegin;
7631   MatCheckProduct(mat, 1);
7632 #if defined(PETSC_HAVE_DEVICE)
7633   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7634   if (match) { /* we can always fallback to the CPU if requested */
7635     switch (product->type) {
7636     case MATPRODUCT_AB:
7637       if (product->api_user) {
7638         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7639         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7640         PetscOptionsEnd();
7641       } else {
7642         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7643         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7644         PetscOptionsEnd();
7645       }
7646       break;
7647     case MATPRODUCT_AtB:
7648       if (product->api_user) {
7649         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7650         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7651         PetscOptionsEnd();
7652       } else {
7653         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7654         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7655         PetscOptionsEnd();
7656       }
7657       break;
7658     case MATPRODUCT_PtAP:
7659       if (product->api_user) {
7660         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7661         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7662         PetscOptionsEnd();
7663       } else {
7664         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7665         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7666         PetscOptionsEnd();
7667       }
7668       break;
7669     default:
7670       break;
7671     }
7672     match = (PetscBool)!usecpu;
7673   }
7674 #endif
7675   if (match) {
7676     switch (product->type) {
7677     case MATPRODUCT_AB:
7678     case MATPRODUCT_AtB:
7679     case MATPRODUCT_PtAP:
7680       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7681       break;
7682     default:
7683       break;
7684     }
7685   }
7686   /* fallback to MPIAIJ ops */
7687   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7688   PetscFunctionReturn(PETSC_SUCCESS);
7689 }
7690 
7691 /*
7692    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7693 
7694    n - the number of block indices in cc[]
7695    cc - the block indices (must be large enough to contain the indices)
7696 */
7697 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7698 {
7699   PetscInt        cnt = -1, nidx, j;
7700   const PetscInt *idx;
7701 
7702   PetscFunctionBegin;
7703   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7704   if (nidx) {
7705     cnt     = 0;
7706     cc[cnt] = idx[0] / bs;
7707     for (j = 1; j < nidx; j++) {
7708       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7709     }
7710   }
7711   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7712   *n = cnt + 1;
7713   PetscFunctionReturn(PETSC_SUCCESS);
7714 }
7715 
7716 /*
7717     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7718 
7719     ncollapsed - the number of block indices
7720     collapsed - the block indices (must be large enough to contain the indices)
7721 */
7722 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7723 {
7724   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7725 
7726   PetscFunctionBegin;
7727   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7728   for (i = start + 1; i < start + bs; i++) {
7729     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7730     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7731     cprevtmp = cprev;
7732     cprev    = merged;
7733     merged   = cprevtmp;
7734   }
7735   *ncollapsed = nprev;
7736   if (collapsed) *collapsed = cprev;
7737   PetscFunctionReturn(PETSC_SUCCESS);
7738 }
7739 
7740 /*
7741  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7742 
7743  Input Parameter:
7744  . Amat - matrix
7745  - symmetrize - make the result symmetric
7746  + scale - scale with diagonal
7747 
7748  Output Parameter:
7749  . a_Gmat - output scalar graph >= 0
7750 
7751 */
7752 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7753 {
7754   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7755   MPI_Comm  comm;
7756   Mat       Gmat;
7757   PetscBool ismpiaij, isseqaij;
7758   Mat       a, b, c;
7759   MatType   jtype;
7760 
7761   PetscFunctionBegin;
7762   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7763   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7764   PetscCall(MatGetSize(Amat, &MM, &NN));
7765   PetscCall(MatGetBlockSize(Amat, &bs));
7766   nloc = (Iend - Istart) / bs;
7767 
7768   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7769   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7770   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7771 
7772   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7773   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7774      implementation */
7775   if (bs > 1) {
7776     PetscCall(MatGetType(Amat, &jtype));
7777     PetscCall(MatCreate(comm, &Gmat));
7778     PetscCall(MatSetType(Gmat, jtype));
7779     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7780     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7781     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7782       PetscInt  *d_nnz, *o_nnz;
7783       MatScalar *aa, val, *AA;
7784       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7785       if (isseqaij) {
7786         a = Amat;
7787         b = NULL;
7788       } else {
7789         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7790         a             = d->A;
7791         b             = d->B;
7792       }
7793       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7794       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7795       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7796         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7797         const PetscInt *cols1, *cols2;
7798         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7799           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7800           nnz[brow / bs] = nc2 / bs;
7801           if (nc2 % bs) ok = 0;
7802           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7803           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7804             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7805             if (nc1 != nc2) ok = 0;
7806             else {
7807               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7808                 if (cols1[jj] != cols2[jj]) ok = 0;
7809                 if (cols1[jj] % bs != jj % bs) ok = 0;
7810               }
7811             }
7812             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7813           }
7814           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7815           if (!ok) {
7816             PetscCall(PetscFree2(d_nnz, o_nnz));
7817             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7818             goto old_bs;
7819           }
7820         }
7821       }
7822       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7823       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7824       PetscCall(PetscFree2(d_nnz, o_nnz));
7825       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7826       // diag
7827       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7828         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7829         ai               = aseq->i;
7830         n                = ai[brow + 1] - ai[brow];
7831         aj               = aseq->j + ai[brow];
7832         for (int k = 0; k < n; k += bs) {        // block columns
7833           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7834           val        = 0;
7835           if (index_size == 0) {
7836             for (int ii = 0; ii < bs; ii++) { // rows in block
7837               aa = aseq->a + ai[brow + ii] + k;
7838               for (int jj = 0; jj < bs; jj++) {         // columns in block
7839                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7840               }
7841             }
7842           } else {                                       // use (index,index) value if provided
7843             for (int iii = 0; iii < index_size; iii++) { // rows in block
7844               int ii = index[iii];
7845               aa     = aseq->a + ai[brow + ii] + k;
7846               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7847                 int jj = index[jjj];
7848                 val += PetscAbs(PetscRealPart(aa[jj]));
7849               }
7850             }
7851           }
7852           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7853           AA[k / bs] = val;
7854         }
7855         grow = Istart / bs + brow / bs;
7856         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7857       }
7858       // off-diag
7859       if (ismpiaij) {
7860         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7861         const PetscScalar *vals;
7862         const PetscInt    *cols, *garray = aij->garray;
7863         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7864         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7865           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7866           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7867             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7868             AA[k / bs] = 0;
7869             AJ[cidx]   = garray[cols[k]] / bs;
7870           }
7871           nc = ncols / bs;
7872           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7873           if (index_size == 0) {
7874             for (int ii = 0; ii < bs; ii++) { // rows in block
7875               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7876               for (int k = 0; k < ncols; k += bs) {
7877                 for (int jj = 0; jj < bs; jj++) { // cols in block
7878                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7879                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7880                 }
7881               }
7882               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7883             }
7884           } else {                                       // use (index,index) value if provided
7885             for (int iii = 0; iii < index_size; iii++) { // rows in block
7886               int ii = index[iii];
7887               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7888               for (int k = 0; k < ncols; k += bs) {
7889                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7890                   int jj = index[jjj];
7891                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7892                 }
7893               }
7894               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7895             }
7896           }
7897           grow = Istart / bs + brow / bs;
7898           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7899         }
7900       }
7901       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7902       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7903       PetscCall(PetscFree2(AA, AJ));
7904     } else {
7905       const PetscScalar *vals;
7906       const PetscInt    *idx;
7907       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7908     old_bs:
7909       /*
7910        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7911        */
7912       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7913       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7914       if (isseqaij) {
7915         PetscInt max_d_nnz;
7916         /*
7917          Determine exact preallocation count for (sequential) scalar matrix
7918          */
7919         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7920         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7921         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7922         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7923         PetscCall(PetscFree3(w0, w1, w2));
7924       } else if (ismpiaij) {
7925         Mat             Daij, Oaij;
7926         const PetscInt *garray;
7927         PetscInt        max_d_nnz;
7928         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7929         /*
7930          Determine exact preallocation count for diagonal block portion of scalar matrix
7931          */
7932         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7933         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7934         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7935         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7936         PetscCall(PetscFree3(w0, w1, w2));
7937         /*
7938          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7939          */
7940         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7941           o_nnz[jj] = 0;
7942           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7943             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7944             o_nnz[jj] += ncols;
7945             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7946           }
7947           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7948         }
7949       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7950       /* get scalar copy (norms) of matrix */
7951       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7952       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7953       PetscCall(PetscFree2(d_nnz, o_nnz));
7954       for (Ii = Istart; Ii < Iend; Ii++) {
7955         PetscInt dest_row = Ii / bs;
7956         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7957         for (jj = 0; jj < ncols; jj++) {
7958           PetscInt    dest_col = idx[jj] / bs;
7959           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7960           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7961         }
7962         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7963       }
7964       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7965       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7966     }
7967   } else {
7968     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7969     else {
7970       Gmat = Amat;
7971       PetscCall(PetscObjectReference((PetscObject)Gmat));
7972     }
7973     if (isseqaij) {
7974       a = Gmat;
7975       b = NULL;
7976     } else {
7977       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7978       a             = d->A;
7979       b             = d->B;
7980     }
7981     if (filter >= 0 || scale) {
7982       /* take absolute value of each entry */
7983       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7984         MatInfo      info;
7985         PetscScalar *avals;
7986         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7987         PetscCall(MatSeqAIJGetArray(c, &avals));
7988         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7989         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7990       }
7991     }
7992   }
7993   if (symmetrize) {
7994     PetscBool isset, issym;
7995     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7996     if (!isset || !issym) {
7997       Mat matTrans;
7998       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7999       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8000       PetscCall(MatDestroy(&matTrans));
8001     }
8002     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8003   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8004   if (scale) {
8005     /* scale c for all diagonal values = 1 or -1 */
8006     Vec diag;
8007     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8008     PetscCall(MatGetDiagonal(Gmat, diag));
8009     PetscCall(VecReciprocal(diag));
8010     PetscCall(VecSqrtAbs(diag));
8011     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8012     PetscCall(VecDestroy(&diag));
8013   }
8014   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8015 
8016   if (filter >= 0) {
8017     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8018     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8019   }
8020   *a_Gmat = Gmat;
8021   PetscFunctionReturn(PETSC_SUCCESS);
8022 }
8023 
8024 /*
8025     Special version for direct calls from Fortran
8026 */
8027 #include <petsc/private/fortranimpl.h>
8028 
8029 /* Change these macros so can be used in void function */
8030 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8031 #undef PetscCall
8032 #define PetscCall(...) \
8033   do { \
8034     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8035     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8036       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8037       return; \
8038     } \
8039   } while (0)
8040 
8041 #undef SETERRQ
8042 #define SETERRQ(comm, ierr, ...) \
8043   do { \
8044     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8045     return; \
8046   } while (0)
8047 
8048 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8049   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8050 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8051   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8052 #else
8053 #endif
8054 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8055 {
8056   Mat         mat = *mmat;
8057   PetscInt    m = *mm, n = *mn;
8058   InsertMode  addv = *maddv;
8059   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8060   PetscScalar value;
8061 
8062   MatCheckPreallocated(mat, 1);
8063   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8064   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8065   {
8066     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8067     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8068     PetscBool roworiented = aij->roworiented;
8069 
8070     /* Some Variables required in the macro */
8071     Mat         A     = aij->A;
8072     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8073     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8074     MatScalar  *aa;
8075     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8076     Mat         B                 = aij->B;
8077     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8078     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8079     MatScalar  *ba;
8080     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8081      * cannot use "#if defined" inside a macro. */
8082     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8083 
8084     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8085     PetscInt   nonew = a->nonew;
8086     MatScalar *ap1, *ap2;
8087 
8088     PetscFunctionBegin;
8089     PetscCall(MatSeqAIJGetArray(A, &aa));
8090     PetscCall(MatSeqAIJGetArray(B, &ba));
8091     for (i = 0; i < m; i++) {
8092       if (im[i] < 0) continue;
8093       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8094       if (im[i] >= rstart && im[i] < rend) {
8095         row      = im[i] - rstart;
8096         lastcol1 = -1;
8097         rp1      = aj + ai[row];
8098         ap1      = aa + ai[row];
8099         rmax1    = aimax[row];
8100         nrow1    = ailen[row];
8101         low1     = 0;
8102         high1    = nrow1;
8103         lastcol2 = -1;
8104         rp2      = bj + bi[row];
8105         ap2      = ba + bi[row];
8106         rmax2    = bimax[row];
8107         nrow2    = bilen[row];
8108         low2     = 0;
8109         high2    = nrow2;
8110 
8111         for (j = 0; j < n; j++) {
8112           if (roworiented) value = v[i * n + j];
8113           else value = v[i + j * m];
8114           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8115           if (in[j] >= cstart && in[j] < cend) {
8116             col = in[j] - cstart;
8117             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8118           } else if (in[j] < 0) continue;
8119           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8120             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8121           } else {
8122             if (mat->was_assembled) {
8123               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8124 #if defined(PETSC_USE_CTABLE)
8125               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8126               col--;
8127 #else
8128               col = aij->colmap[in[j]] - 1;
8129 #endif
8130               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8131                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8132                 col = in[j];
8133                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8134                 B        = aij->B;
8135                 b        = (Mat_SeqAIJ *)B->data;
8136                 bimax    = b->imax;
8137                 bi       = b->i;
8138                 bilen    = b->ilen;
8139                 bj       = b->j;
8140                 rp2      = bj + bi[row];
8141                 ap2      = ba + bi[row];
8142                 rmax2    = bimax[row];
8143                 nrow2    = bilen[row];
8144                 low2     = 0;
8145                 high2    = nrow2;
8146                 bm       = aij->B->rmap->n;
8147                 ba       = b->a;
8148                 inserted = PETSC_FALSE;
8149               }
8150             } else col = in[j];
8151             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8152           }
8153         }
8154       } else if (!aij->donotstash) {
8155         if (roworiented) {
8156           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8157         } else {
8158           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8159         }
8160       }
8161     }
8162     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8163     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8164   }
8165   PetscFunctionReturnVoid();
8166 }
8167 
8168 /* Undefining these here since they were redefined from their original definition above! No
8169  * other PETSc functions should be defined past this point, as it is impossible to recover the
8170  * original definitions */
8171 #undef PetscCall
8172 #undef SETERRQ
8173