xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4cc2b5b5fe4ffd09e5956b56d7cdc4f43e324103)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287 
288   PetscFunctionBegin;
289   PetscCall(MatGetSize(A, &m, &n));
290   PetscCall(PetscCalloc1(n, &work));
291   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
292   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
294   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
295   if (type == NORM_2) {
296     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
297     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
298   } else if (type == NORM_1) {
299     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
300     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
301   } else if (type == NORM_INFINITY) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
304   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
307   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
310   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
311   if (type == NORM_INFINITY) {
312     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
313   } else {
314     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
315   }
316   PetscCall(PetscFree(work));
317   if (type == NORM_2) {
318     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
319   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
320     for (i = 0; i < n; i++) reductions[i] /= m;
321   }
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
326 {
327   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
328   IS              sis, gis;
329   const PetscInt *isis, *igis;
330   PetscInt        n, *iis, nsis, ngis, rstart, i;
331 
332   PetscFunctionBegin;
333   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
334   PetscCall(MatFindNonzeroRows(a->B, &gis));
335   PetscCall(ISGetSize(gis, &ngis));
336   PetscCall(ISGetSize(sis, &nsis));
337   PetscCall(ISGetIndices(sis, &isis));
338   PetscCall(ISGetIndices(gis, &igis));
339 
340   PetscCall(PetscMalloc1(ngis + nsis, &iis));
341   PetscCall(PetscArraycpy(iis, igis, ngis));
342   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
343   n = ngis + nsis;
344   PetscCall(PetscSortRemoveDupsInt(&n, iis));
345   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
346   for (i = 0; i < n; i++) iis[i] += rstart;
347   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
348 
349   PetscCall(ISRestoreIndices(sis, &isis));
350   PetscCall(ISRestoreIndices(gis, &igis));
351   PetscCall(ISDestroy(&sis));
352   PetscCall(ISDestroy(&gis));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
356 /*
357   Local utility routine that creates a mapping from the global column
358 number to the local number in the off-diagonal part of the local
359 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
360 a slightly higher hash table cost; without it it is not scalable (each processor
361 has an order N integer array but is fast to access.
362 */
363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
364 {
365   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
366   PetscInt    n   = aij->B->cmap->n, i;
367 
368   PetscFunctionBegin;
369   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
370 #if defined(PETSC_USE_CTABLE)
371   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
372   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
373 #else
374   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
375   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
376 #endif
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
381   do { \
382     if (col <= lastcol1) low1 = 0; \
383     else high1 = nrow1; \
384     lastcol1 = col; \
385     while (high1 - low1 > 5) { \
386       t = (low1 + high1) / 2; \
387       if (rp1[t] > col) high1 = t; \
388       else low1 = t; \
389     } \
390     for (_i = low1; _i < high1; _i++) { \
391       if (rp1[_i] > col) break; \
392       if (rp1[_i] == col) { \
393         if (addv == ADD_VALUES) { \
394           ap1[_i] += value; \
395           /* Not sure LogFlops will slow dow the code or not */ \
396           (void)PetscLogFlops(1.0); \
397         } else ap1[_i] = value; \
398         goto a_noinsert; \
399       } \
400     } \
401     if (value == 0.0 && ignorezeroentries && row != col) { \
402       low1  = 0; \
403       high1 = nrow1; \
404       goto a_noinsert; \
405     } \
406     if (nonew == 1) { \
407       low1  = 0; \
408       high1 = nrow1; \
409       goto a_noinsert; \
410     } \
411     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
412     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
413     N = nrow1++ - 1; \
414     a->nz++; \
415     high1++; \
416     /* shift up all the later entries in this row */ \
417     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
418     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
419     rp1[_i] = col; \
420     ap1[_i] = value; \
421   a_noinsert:; \
422     ailen[row] = nrow1; \
423   } while (0)
424 
425 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
426   do { \
427     if (col <= lastcol2) low2 = 0; \
428     else high2 = nrow2; \
429     lastcol2 = col; \
430     while (high2 - low2 > 5) { \
431       t = (low2 + high2) / 2; \
432       if (rp2[t] > col) high2 = t; \
433       else low2 = t; \
434     } \
435     for (_i = low2; _i < high2; _i++) { \
436       if (rp2[_i] > col) break; \
437       if (rp2[_i] == col) { \
438         if (addv == ADD_VALUES) { \
439           ap2[_i] += value; \
440           (void)PetscLogFlops(1.0); \
441         } else ap2[_i] = value; \
442         goto b_noinsert; \
443       } \
444     } \
445     if (value == 0.0 && ignorezeroentries) { \
446       low2  = 0; \
447       high2 = nrow2; \
448       goto b_noinsert; \
449     } \
450     if (nonew == 1) { \
451       low2  = 0; \
452       high2 = nrow2; \
453       goto b_noinsert; \
454     } \
455     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
456     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
457     N = nrow2++ - 1; \
458     b->nz++; \
459     high2++; \
460     /* shift up all the later entries in this row */ \
461     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
462     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
463     rp2[_i] = col; \
464     ap2[_i] = value; \
465   b_noinsert:; \
466     bilen[row] = nrow2; \
467   } while (0)
468 
469 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
470 {
471   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
472   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
473   PetscInt     l, *garray                         = mat->garray, diag;
474   PetscScalar *aa, *ba;
475 
476   PetscFunctionBegin;
477   /* code only works for square matrices A */
478 
479   /* find size of row to the left of the diagonal part */
480   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
481   row = row - diag;
482   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
483     if (garray[b->j[b->i[row] + l]] > diag) break;
484   }
485   if (l) {
486     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
487     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
488     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
489   }
490 
491   /* diagonal part */
492   if (a->i[row + 1] - a->i[row]) {
493     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
494     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
495     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
496   }
497 
498   /* right of diagonal part */
499   if (b->i[row + 1] - b->i[row] - l) {
500     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
501     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
502     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
503   }
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506 
507 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
508 {
509   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
510   PetscScalar value = 0.0;
511   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
512   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
513   PetscBool   roworiented = aij->roworiented;
514 
515   /* Some Variables required in the macro */
516   Mat         A     = aij->A;
517   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
518   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
519   PetscBool   ignorezeroentries = a->ignorezeroentries;
520   Mat         B                 = aij->B;
521   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
522   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
523   MatScalar  *aa, *ba;
524   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
525   PetscInt    nonew;
526   MatScalar  *ap1, *ap2;
527 
528   PetscFunctionBegin;
529   PetscCall(MatSeqAIJGetArray(A, &aa));
530   PetscCall(MatSeqAIJGetArray(B, &ba));
531   for (i = 0; i < m; i++) {
532     if (im[i] < 0) continue;
533     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
534     if (im[i] >= rstart && im[i] < rend) {
535       row      = im[i] - rstart;
536       lastcol1 = -1;
537       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
538       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
539       rmax1    = aimax[row];
540       nrow1    = ailen[row];
541       low1     = 0;
542       high1    = nrow1;
543       lastcol2 = -1;
544       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
545       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
546       rmax2    = bimax[row];
547       nrow2    = bilen[row];
548       low2     = 0;
549       high2    = nrow2;
550 
551       for (j = 0; j < n; j++) {
552         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
553         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
554         if (in[j] >= cstart && in[j] < cend) {
555           col   = in[j] - cstart;
556           nonew = a->nonew;
557           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
558         } else if (in[j] < 0) {
559           continue;
560         } else {
561           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
562           if (mat->was_assembled) {
563             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
564 #if defined(PETSC_USE_CTABLE)
565             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
566             col--;
567 #else
568             col = aij->colmap[in[j]] - 1;
569 #endif
570             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
571               PetscCall(MatDisAssemble_MPIAIJ(mat));               /* Change aij->B from reduced/local format to expanded/global format */
572               col = in[j];
573               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
574               B     = aij->B;
575               b     = (Mat_SeqAIJ *)B->data;
576               bimax = b->imax;
577               bi    = b->i;
578               bilen = b->ilen;
579               bj    = b->j;
580               ba    = b->a;
581               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
582               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
583               rmax2 = bimax[row];
584               nrow2 = bilen[row];
585               low2  = 0;
586               high2 = nrow2;
587               bm    = aij->B->rmap->n;
588               ba    = b->a;
589             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
590               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
591                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
592               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
593             }
594           } else col = in[j];
595           nonew = b->nonew;
596           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
597         }
598       }
599     } else {
600       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
601       if (!aij->donotstash) {
602         mat->assembled = PETSC_FALSE;
603         if (roworiented) {
604           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
605         } else {
606           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         }
608       }
609     }
610   }
611   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
612   PetscCall(MatSeqAIJRestoreArray(B, &ba));
613   PetscFunctionReturn(PETSC_SUCCESS);
614 }
615 
616 /*
617     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
618     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
619     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
620 */
621 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
622 {
623   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
624   Mat         A      = aij->A; /* diagonal part of the matrix */
625   Mat         B      = aij->B; /* off-diagonal part of the matrix */
626   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
627   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
628   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
629   PetscInt   *ailen = a->ilen, *aj = a->j;
630   PetscInt   *bilen = b->ilen, *bj = b->j;
631   PetscInt    am          = aij->A->rmap->n, j;
632   PetscInt    diag_so_far = 0, dnz;
633   PetscInt    offd_so_far = 0, onz;
634 
635   PetscFunctionBegin;
636   /* Iterate over all rows of the matrix */
637   for (j = 0; j < am; j++) {
638     dnz = onz = 0;
639     /*  Iterate over all non-zero columns of the current row */
640     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
641       /* If column is in the diagonal */
642       if (mat_j[col] >= cstart && mat_j[col] < cend) {
643         aj[diag_so_far++] = mat_j[col] - cstart;
644         dnz++;
645       } else { /* off-diagonal entries */
646         bj[offd_so_far++] = mat_j[col];
647         onz++;
648       }
649     }
650     ailen[j] = dnz;
651     bilen[j] = onz;
652   }
653   PetscFunctionReturn(PETSC_SUCCESS);
654 }
655 
656 /*
657     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
658     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
659     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
660     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
661     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
662 */
663 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
664 {
665   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
666   Mat          A    = aij->A; /* diagonal part of the matrix */
667   Mat          B    = aij->B; /* off-diagonal part of the matrix */
668   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
669   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
670   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
671   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
672   PetscInt    *ailen = a->ilen, *aj = a->j;
673   PetscInt    *bilen = b->ilen, *bj = b->j;
674   PetscInt     am          = aij->A->rmap->n, j;
675   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
676   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
677   PetscScalar *aa = a->a, *ba = b->a;
678 
679   PetscFunctionBegin;
680   /* Iterate over all rows of the matrix */
681   for (j = 0; j < am; j++) {
682     dnz_row = onz_row = 0;
683     rowstart_offd     = full_offd_i[j];
684     rowstart_diag     = full_diag_i[j];
685     /*  Iterate over all non-zero columns of the current row */
686     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
687       /* If column is in the diagonal */
688       if (mat_j[col] >= cstart && mat_j[col] < cend) {
689         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
690         aa[rowstart_diag + dnz_row] = mat_a[col];
691         dnz_row++;
692       } else { /* off-diagonal entries */
693         bj[rowstart_offd + onz_row] = mat_j[col];
694         ba[rowstart_offd + onz_row] = mat_a[col];
695         onz_row++;
696       }
697     }
698     ailen[j] = dnz_row;
699     bilen[j] = onz_row;
700   }
701   PetscFunctionReturn(PETSC_SUCCESS);
702 }
703 
704 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
705 {
706   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
707   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
708   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
709 
710   PetscFunctionBegin;
711   for (i = 0; i < m; i++) {
712     if (idxm[i] < 0) continue; /* negative row */
713     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
714     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
715     row = idxm[i] - rstart;
716     for (j = 0; j < n; j++) {
717       if (idxn[j] < 0) continue; /* negative column */
718       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
719       if (idxn[j] >= cstart && idxn[j] < cend) {
720         col = idxn[j] - cstart;
721         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
722       } else {
723         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
724 #if defined(PETSC_USE_CTABLE)
725         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
726         col--;
727 #else
728         col = aij->colmap[idxn[j]] - 1;
729 #endif
730         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
731         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
732       }
733     }
734   }
735   PetscFunctionReturn(PETSC_SUCCESS);
736 }
737 
738 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
739 {
740   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
741   PetscInt    nstash, reallocs;
742 
743   PetscFunctionBegin;
744   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
745 
746   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
747   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
748   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
753 {
754   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
755   PetscMPIInt  n;
756   PetscInt     i, j, rstart, ncols, flg;
757   PetscInt    *row, *col;
758   PetscBool    other_disassembled;
759   PetscScalar *val;
760 
761   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
762 
763   PetscFunctionBegin;
764   if (!aij->donotstash && !mat->nooffprocentries) {
765     while (1) {
766       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
767       if (!flg) break;
768 
769       for (i = 0; i < n;) {
770         /* Now identify the consecutive vals belonging to the same row */
771         for (j = i, rstart = row[j]; j < n; j++) {
772           if (row[j] != rstart) break;
773         }
774         if (j < n) ncols = j - i;
775         else ncols = n - i;
776         /* Now assemble all these values with a single function call */
777         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
778         i = j;
779       }
780     }
781     PetscCall(MatStashScatterEnd_Private(&mat->stash));
782   }
783 #if defined(PETSC_HAVE_DEVICE)
784   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
785   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
786   if (mat->boundtocpu) {
787     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
788     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
789   }
790 #endif
791   PetscCall(MatAssemblyBegin(aij->A, mode));
792   PetscCall(MatAssemblyEnd(aij->A, mode));
793 
794   /* determine if any processor has disassembled, if so we must
795      also disassemble ourself, in order that we may reassemble. */
796   /*
797      if nonzero structure of submatrix B cannot change then we know that
798      no processor disassembled thus we can skip this stuff
799   */
800   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
801     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
802     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
803       PetscCall(MatDisAssemble_MPIAIJ(mat));
804     }
805   }
806   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
807   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
808 #if defined(PETSC_HAVE_DEVICE)
809   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
810 #endif
811   PetscCall(MatAssemblyBegin(aij->B, mode));
812   PetscCall(MatAssemblyEnd(aij->B, mode));
813 
814   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
815 
816   aij->rowvalues = NULL;
817 
818   PetscCall(VecDestroy(&aij->diag));
819 
820   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
821   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
822     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
823     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
824   }
825 #if defined(PETSC_HAVE_DEVICE)
826   mat->offloadmask = PETSC_OFFLOAD_BOTH;
827 #endif
828   PetscFunctionReturn(PETSC_SUCCESS);
829 }
830 
831 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
832 {
833   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
834 
835   PetscFunctionBegin;
836   PetscCall(MatZeroEntries(l->A));
837   PetscCall(MatZeroEntries(l->B));
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
842 {
843   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
844   PetscInt   *lrows;
845   PetscInt    r, len;
846   PetscBool   cong;
847 
848   PetscFunctionBegin;
849   /* get locally owned rows */
850   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
851   PetscCall(MatHasCongruentLayouts(A, &cong));
852   /* fix right-hand side if needed */
853   if (x && b) {
854     const PetscScalar *xx;
855     PetscScalar       *bb;
856 
857     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
858     PetscCall(VecGetArrayRead(x, &xx));
859     PetscCall(VecGetArray(b, &bb));
860     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
861     PetscCall(VecRestoreArrayRead(x, &xx));
862     PetscCall(VecRestoreArray(b, &bb));
863   }
864 
865   if (diag != 0.0 && cong) {
866     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
867     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
868   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
869     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
870     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
871     PetscInt    nnwA, nnwB;
872     PetscBool   nnzA, nnzB;
873 
874     nnwA = aijA->nonew;
875     nnwB = aijB->nonew;
876     nnzA = aijA->keepnonzeropattern;
877     nnzB = aijB->keepnonzeropattern;
878     if (!nnzA) {
879       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
880       aijA->nonew = 0;
881     }
882     if (!nnzB) {
883       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
884       aijB->nonew = 0;
885     }
886     /* Must zero here before the next loop */
887     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
888     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
889     for (r = 0; r < len; ++r) {
890       const PetscInt row = lrows[r] + A->rmap->rstart;
891       if (row >= A->cmap->N) continue;
892       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
893     }
894     aijA->nonew = nnwA;
895     aijB->nonew = nnwB;
896   } else {
897     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
898     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
899   }
900   PetscCall(PetscFree(lrows));
901   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
902   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
903 
904   /* only change matrix nonzero state if pattern was allowed to be changed */
905   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
906     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
907     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
908   }
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
912 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
913 {
914   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
915   PetscMPIInt        n = A->rmap->n;
916   PetscInt           i, j, r, m, len = 0;
917   PetscInt          *lrows, *owners = A->rmap->range;
918   PetscMPIInt        p = 0;
919   PetscSFNode       *rrows;
920   PetscSF            sf;
921   const PetscScalar *xx;
922   PetscScalar       *bb, *mask, *aij_a;
923   Vec                xmask, lmask;
924   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
925   const PetscInt    *aj, *ii, *ridx;
926   PetscScalar       *aa;
927 
928   PetscFunctionBegin;
929   /* Create SF where leaves are input rows and roots are owned rows */
930   PetscCall(PetscMalloc1(n, &lrows));
931   for (r = 0; r < n; ++r) lrows[r] = -1;
932   PetscCall(PetscMalloc1(N, &rrows));
933   for (r = 0; r < N; ++r) {
934     const PetscInt idx = rows[r];
935     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
936     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
937       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
938     }
939     rrows[r].rank  = p;
940     rrows[r].index = rows[r] - owners[p];
941   }
942   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
943   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
944   /* Collect flags for rows to be zeroed */
945   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
946   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
947   PetscCall(PetscSFDestroy(&sf));
948   /* Compress and put in row numbers */
949   for (r = 0; r < n; ++r)
950     if (lrows[r] >= 0) lrows[len++] = r;
951   /* zero diagonal part of matrix */
952   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
953   /* handle off-diagonal part of matrix */
954   PetscCall(MatCreateVecs(A, &xmask, NULL));
955   PetscCall(VecDuplicate(l->lvec, &lmask));
956   PetscCall(VecGetArray(xmask, &bb));
957   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
958   PetscCall(VecRestoreArray(xmask, &bb));
959   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
960   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
961   PetscCall(VecDestroy(&xmask));
962   if (x && b) { /* this code is buggy when the row and column layout don't match */
963     PetscBool cong;
964 
965     PetscCall(MatHasCongruentLayouts(A, &cong));
966     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
967     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
968     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
969     PetscCall(VecGetArrayRead(l->lvec, &xx));
970     PetscCall(VecGetArray(b, &bb));
971   }
972   PetscCall(VecGetArray(lmask, &mask));
973   /* remove zeroed rows of off-diagonal matrix */
974   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
975   ii = aij->i;
976   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
977   /* loop over all elements of off process part of matrix zeroing removed columns*/
978   if (aij->compressedrow.use) {
979     m    = aij->compressedrow.nrows;
980     ii   = aij->compressedrow.i;
981     ridx = aij->compressedrow.rindex;
982     for (i = 0; i < m; i++) {
983       n  = ii[i + 1] - ii[i];
984       aj = aij->j + ii[i];
985       aa = aij_a + ii[i];
986 
987       for (j = 0; j < n; j++) {
988         if (PetscAbsScalar(mask[*aj])) {
989           if (b) bb[*ridx] -= *aa * xx[*aj];
990           *aa = 0.0;
991         }
992         aa++;
993         aj++;
994       }
995       ridx++;
996     }
997   } else { /* do not use compressed row format */
998     m = l->B->rmap->n;
999     for (i = 0; i < m; i++) {
1000       n  = ii[i + 1] - ii[i];
1001       aj = aij->j + ii[i];
1002       aa = aij_a + ii[i];
1003       for (j = 0; j < n; j++) {
1004         if (PetscAbsScalar(mask[*aj])) {
1005           if (b) bb[i] -= *aa * xx[*aj];
1006           *aa = 0.0;
1007         }
1008         aa++;
1009         aj++;
1010       }
1011     }
1012   }
1013   if (x && b) {
1014     PetscCall(VecRestoreArray(b, &bb));
1015     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1016   }
1017   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1018   PetscCall(VecRestoreArray(lmask, &mask));
1019   PetscCall(VecDestroy(&lmask));
1020   PetscCall(PetscFree(lrows));
1021 
1022   /* only change matrix nonzero state if pattern was allowed to be changed */
1023   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1024     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1025     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1026   }
1027   PetscFunctionReturn(PETSC_SUCCESS);
1028 }
1029 
1030 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1031 {
1032   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1033   PetscInt    nt;
1034   VecScatter  Mvctx = a->Mvctx;
1035 
1036   PetscFunctionBegin;
1037   PetscCall(VecGetLocalSize(xx, &nt));
1038   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1039   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1040   PetscUseTypeMethod(a->A, mult, xx, yy);
1041   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1043   PetscFunctionReturn(PETSC_SUCCESS);
1044 }
1045 
1046 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1047 {
1048   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1049 
1050   PetscFunctionBegin;
1051   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1056 {
1057   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1058   VecScatter  Mvctx = a->Mvctx;
1059 
1060   PetscFunctionBegin;
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   /* do nondiagonal part */
1074   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1075   /* do local part */
1076   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1077   /* add partial results together */
1078   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1079   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1080   PetscFunctionReturn(PETSC_SUCCESS);
1081 }
1082 
1083 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1084 {
1085   MPI_Comm    comm;
1086   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1087   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1088   IS          Me, Notme;
1089   PetscInt    M, N, first, last, *notme, i;
1090   PetscBool   lf;
1091   PetscMPIInt size;
1092 
1093   PetscFunctionBegin;
1094   /* Easy test: symmetric diagonal block */
1095   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1096   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1097   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1098   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1099   PetscCallMPI(MPI_Comm_size(comm, &size));
1100   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1101 
1102   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1103   PetscCall(MatGetSize(Amat, &M, &N));
1104   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1105   PetscCall(PetscMalloc1(N - last + first, &notme));
1106   for (i = 0; i < first; i++) notme[i] = i;
1107   for (i = last; i < M; i++) notme[i - last + first] = i;
1108   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1109   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1110   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1111   Aoff = Aoffs[0];
1112   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1113   Boff = Boffs[0];
1114   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1115   PetscCall(MatDestroyMatrices(1, &Aoffs));
1116   PetscCall(MatDestroyMatrices(1, &Boffs));
1117   PetscCall(ISDestroy(&Me));
1118   PetscCall(ISDestroy(&Notme));
1119   PetscCall(PetscFree(notme));
1120   PetscFunctionReturn(PETSC_SUCCESS);
1121 }
1122 
1123 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1124 {
1125   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1126 
1127   PetscFunctionBegin;
1128   /* do nondiagonal part */
1129   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1130   /* do local part */
1131   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1132   /* add partial results together */
1133   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1134   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1135   PetscFunctionReturn(PETSC_SUCCESS);
1136 }
1137 
1138 /*
1139   This only works correctly for square matrices where the subblock A->A is the
1140    diagonal block
1141 */
1142 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1143 {
1144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1145 
1146   PetscFunctionBegin;
1147   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1148   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1149   PetscCall(MatGetDiagonal(a->A, v));
1150   PetscFunctionReturn(PETSC_SUCCESS);
1151 }
1152 
1153 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1154 {
1155   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1156 
1157   PetscFunctionBegin;
1158   PetscCall(MatScale(a->A, aa));
1159   PetscCall(MatScale(a->B, aa));
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162 
1163 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1164 {
1165   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1166   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1167   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1168   const PetscInt    *garray = aij->garray;
1169   const PetscScalar *aa, *ba;
1170   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1171   PetscInt64         nz, hnz;
1172   PetscInt          *rowlens;
1173   PetscInt          *colidxs;
1174   PetscScalar       *matvals;
1175   PetscMPIInt        rank;
1176 
1177   PetscFunctionBegin;
1178   PetscCall(PetscViewerSetUp(viewer));
1179 
1180   M  = mat->rmap->N;
1181   N  = mat->cmap->N;
1182   m  = mat->rmap->n;
1183   rs = mat->rmap->rstart;
1184   cs = mat->cmap->rstart;
1185   nz = A->nz + B->nz;
1186 
1187   /* write matrix header */
1188   header[0] = MAT_FILE_CLASSID;
1189   header[1] = M;
1190   header[2] = N;
1191   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1192   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1193   if (rank == 0) {
1194     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1195     else header[3] = (PetscInt)hnz;
1196   }
1197   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1198 
1199   /* fill in and store row lengths  */
1200   PetscCall(PetscMalloc1(m, &rowlens));
1201   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1202   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1203   PetscCall(PetscFree(rowlens));
1204 
1205   /* fill in and store column indices */
1206   PetscCall(PetscMalloc1(nz, &colidxs));
1207   for (cnt = 0, i = 0; i < m; i++) {
1208     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1209       if (garray[B->j[jb]] > cs) break;
1210       colidxs[cnt++] = garray[B->j[jb]];
1211     }
1212     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1213     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1214   }
1215   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1216   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1217   PetscCall(PetscFree(colidxs));
1218 
1219   /* fill in and store nonzero values */
1220   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1221   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1222   PetscCall(PetscMalloc1(nz, &matvals));
1223   for (cnt = 0, i = 0; i < m; i++) {
1224     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1225       if (garray[B->j[jb]] > cs) break;
1226       matvals[cnt++] = ba[jb];
1227     }
1228     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1229     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1230   }
1231   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1232   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1233   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1234   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1235   PetscCall(PetscFree(matvals));
1236 
1237   /* write block size option to the viewer's .info file */
1238   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1239   PetscFunctionReturn(PETSC_SUCCESS);
1240 }
1241 
1242 #include <petscdraw.h>
1243 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1244 {
1245   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1246   PetscMPIInt       rank = aij->rank, size = aij->size;
1247   PetscBool         isdraw, iascii, isbinary;
1248   PetscViewer       sviewer;
1249   PetscViewerFormat format;
1250 
1251   PetscFunctionBegin;
1252   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1253   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1255   if (iascii) {
1256     PetscCall(PetscViewerGetFormat(viewer, &format));
1257     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1258       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1259       PetscCall(PetscMalloc1(size, &nz));
1260       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1261       for (i = 0; i < (PetscInt)size; i++) {
1262         nmax = PetscMax(nmax, nz[i]);
1263         nmin = PetscMin(nmin, nz[i]);
1264         navg += nz[i];
1265       }
1266       PetscCall(PetscFree(nz));
1267       navg = navg / size;
1268       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1269       PetscFunctionReturn(PETSC_SUCCESS);
1270     }
1271     PetscCall(PetscViewerGetFormat(viewer, &format));
1272     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1273       MatInfo   info;
1274       PetscInt *inodes = NULL;
1275 
1276       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1277       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1278       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1279       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1280       if (!inodes) {
1281         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1282                                                      (double)info.memory));
1283       } else {
1284         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1285                                                      (double)info.memory));
1286       }
1287       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1288       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1289       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(PetscViewerFlush(viewer));
1292       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1293       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1294       PetscCall(VecScatterView(aij->Mvctx, viewer));
1295       PetscFunctionReturn(PETSC_SUCCESS);
1296     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1297       PetscInt inodecount, inodelimit, *inodes;
1298       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1299       if (inodes) {
1300         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1301       } else {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1303       }
1304       PetscFunctionReturn(PETSC_SUCCESS);
1305     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     }
1308   } else if (isbinary) {
1309     if (size == 1) {
1310       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1311       PetscCall(MatView(aij->A, viewer));
1312     } else {
1313       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1314     }
1315     PetscFunctionReturn(PETSC_SUCCESS);
1316   } else if (iascii && size == 1) {
1317     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1318     PetscCall(MatView(aij->A, viewer));
1319     PetscFunctionReturn(PETSC_SUCCESS);
1320   } else if (isdraw) {
1321     PetscDraw draw;
1322     PetscBool isnull;
1323     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1324     PetscCall(PetscDrawIsNull(draw, &isnull));
1325     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1326   }
1327 
1328   { /* assemble the entire matrix onto first processor */
1329     Mat A = NULL, Av;
1330     IS  isrow, iscol;
1331 
1332     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1333     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1334     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1335     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1336     /*  The commented code uses MatCreateSubMatrices instead */
1337     /*
1338     Mat *AA, A = NULL, Av;
1339     IS  isrow,iscol;
1340 
1341     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1342     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1343     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1344     if (rank == 0) {
1345        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1346        A    = AA[0];
1347        Av   = AA[0];
1348     }
1349     PetscCall(MatDestroySubMatrices(1,&AA));
1350 */
1351     PetscCall(ISDestroy(&iscol));
1352     PetscCall(ISDestroy(&isrow));
1353     /*
1354        Everyone has to call to draw the matrix since the graphics waits are
1355        synchronized across all processors that share the PetscDraw object
1356     */
1357     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1358     if (rank == 0) {
1359       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1360       PetscCall(MatView_SeqAIJ(Av, sviewer));
1361     }
1362     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1363     PetscCall(MatDestroy(&A));
1364   }
1365   PetscFunctionReturn(PETSC_SUCCESS);
1366 }
1367 
1368 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1369 {
1370   PetscBool iascii, isdraw, issocket, isbinary;
1371 
1372   PetscFunctionBegin;
1373   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1374   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1377   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1378   PetscFunctionReturn(PETSC_SUCCESS);
1379 }
1380 
1381 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1382 {
1383   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1384   Vec         bb1 = NULL;
1385   PetscBool   hasop;
1386 
1387   PetscFunctionBegin;
1388   if (flag == SOR_APPLY_UPPER) {
1389     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1390     PetscFunctionReturn(PETSC_SUCCESS);
1391   }
1392 
1393   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1394 
1395   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1396     if (flag & SOR_ZERO_INITIAL_GUESS) {
1397       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1398       its--;
1399     }
1400 
1401     while (its--) {
1402       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1403       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1404 
1405       /* update rhs: bb1 = bb - B*x */
1406       PetscCall(VecScale(mat->lvec, -1.0));
1407       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1408 
1409       /* local sweep */
1410       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1411     }
1412   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1413     if (flag & SOR_ZERO_INITIAL_GUESS) {
1414       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1415       its--;
1416     }
1417     while (its--) {
1418       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1419       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1420 
1421       /* update rhs: bb1 = bb - B*x */
1422       PetscCall(VecScale(mat->lvec, -1.0));
1423       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1424 
1425       /* local sweep */
1426       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1427     }
1428   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1429     if (flag & SOR_ZERO_INITIAL_GUESS) {
1430       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1431       its--;
1432     }
1433     while (its--) {
1434       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1435       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1436 
1437       /* update rhs: bb1 = bb - B*x */
1438       PetscCall(VecScale(mat->lvec, -1.0));
1439       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1440 
1441       /* local sweep */
1442       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1443     }
1444   } else if (flag & SOR_EISENSTAT) {
1445     Vec xx1;
1446 
1447     PetscCall(VecDuplicate(bb, &xx1));
1448     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1449 
1450     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1451     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1452     if (!mat->diag) {
1453       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1454       PetscCall(MatGetDiagonal(matin, mat->diag));
1455     }
1456     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1457     if (hasop) {
1458       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1459     } else {
1460       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1461     }
1462     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1463 
1464     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1465 
1466     /* local sweep */
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1468     PetscCall(VecAXPY(xx, 1.0, xx1));
1469     PetscCall(VecDestroy(&xx1));
1470   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1471 
1472   PetscCall(VecDestroy(&bb1));
1473 
1474   matin->factorerrortype = mat->A->factorerrortype;
1475   PetscFunctionReturn(PETSC_SUCCESS);
1476 }
1477 
1478 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1479 {
1480   Mat             aA, aB, Aperm;
1481   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1482   PetscScalar    *aa, *ba;
1483   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1484   PetscSF         rowsf, sf;
1485   IS              parcolp = NULL;
1486   PetscBool       done;
1487 
1488   PetscFunctionBegin;
1489   PetscCall(MatGetLocalSize(A, &m, &n));
1490   PetscCall(ISGetIndices(rowp, &rwant));
1491   PetscCall(ISGetIndices(colp, &cwant));
1492   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1493 
1494   /* Invert row permutation to find out where my rows should go */
1495   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1496   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1497   PetscCall(PetscSFSetFromOptions(rowsf));
1498   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1499   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1500   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1501 
1502   /* Invert column permutation to find out where my columns should go */
1503   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1504   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1505   PetscCall(PetscSFSetFromOptions(sf));
1506   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1507   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1508   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1509   PetscCall(PetscSFDestroy(&sf));
1510 
1511   PetscCall(ISRestoreIndices(rowp, &rwant));
1512   PetscCall(ISRestoreIndices(colp, &cwant));
1513   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1514 
1515   /* Find out where my gcols should go */
1516   PetscCall(MatGetSize(aB, NULL, &ng));
1517   PetscCall(PetscMalloc1(ng, &gcdest));
1518   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1519   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1520   PetscCall(PetscSFSetFromOptions(sf));
1521   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1522   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1523   PetscCall(PetscSFDestroy(&sf));
1524 
1525   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1526   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1527   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1528   for (i = 0; i < m; i++) {
1529     PetscInt    row = rdest[i];
1530     PetscMPIInt rowner;
1531     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1532     for (j = ai[i]; j < ai[i + 1]; j++) {
1533       PetscInt    col = cdest[aj[j]];
1534       PetscMPIInt cowner;
1535       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1536       if (rowner == cowner) dnnz[i]++;
1537       else onnz[i]++;
1538     }
1539     for (j = bi[i]; j < bi[i + 1]; j++) {
1540       PetscInt    col = gcdest[bj[j]];
1541       PetscMPIInt cowner;
1542       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1543       if (rowner == cowner) dnnz[i]++;
1544       else onnz[i]++;
1545     }
1546   }
1547   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1548   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1551   PetscCall(PetscSFDestroy(&rowsf));
1552 
1553   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1554   PetscCall(MatSeqAIJGetArray(aA, &aa));
1555   PetscCall(MatSeqAIJGetArray(aB, &ba));
1556   for (i = 0; i < m; i++) {
1557     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1558     PetscInt  j0, rowlen;
1559     rowlen = ai[i + 1] - ai[i];
1560     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1561       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1562       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1563     }
1564     rowlen = bi[i + 1] - bi[i];
1565     for (j0 = j = 0; j < rowlen; j0 = j) {
1566       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1567       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1568     }
1569   }
1570   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1571   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1572   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1573   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1574   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1575   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1576   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1577   PetscCall(PetscFree3(work, rdest, cdest));
1578   PetscCall(PetscFree(gcdest));
1579   if (parcolp) PetscCall(ISDestroy(&colp));
1580   *B = Aperm;
1581   PetscFunctionReturn(PETSC_SUCCESS);
1582 }
1583 
1584 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1585 {
1586   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1587 
1588   PetscFunctionBegin;
1589   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1590   if (ghosts) *ghosts = aij->garray;
1591   PetscFunctionReturn(PETSC_SUCCESS);
1592 }
1593 
1594 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1595 {
1596   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1597   Mat            A = mat->A, B = mat->B;
1598   PetscLogDouble isend[5], irecv[5];
1599 
1600   PetscFunctionBegin;
1601   info->block_size = 1.0;
1602   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1603 
1604   isend[0] = info->nz_used;
1605   isend[1] = info->nz_allocated;
1606   isend[2] = info->nz_unneeded;
1607   isend[3] = info->memory;
1608   isend[4] = info->mallocs;
1609 
1610   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1611 
1612   isend[0] += info->nz_used;
1613   isend[1] += info->nz_allocated;
1614   isend[2] += info->nz_unneeded;
1615   isend[3] += info->memory;
1616   isend[4] += info->mallocs;
1617   if (flag == MAT_LOCAL) {
1618     info->nz_used      = isend[0];
1619     info->nz_allocated = isend[1];
1620     info->nz_unneeded  = isend[2];
1621     info->memory       = isend[3];
1622     info->mallocs      = isend[4];
1623   } else if (flag == MAT_GLOBAL_MAX) {
1624     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1625 
1626     info->nz_used      = irecv[0];
1627     info->nz_allocated = irecv[1];
1628     info->nz_unneeded  = irecv[2];
1629     info->memory       = irecv[3];
1630     info->mallocs      = irecv[4];
1631   } else if (flag == MAT_GLOBAL_SUM) {
1632     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1633 
1634     info->nz_used      = irecv[0];
1635     info->nz_allocated = irecv[1];
1636     info->nz_unneeded  = irecv[2];
1637     info->memory       = irecv[3];
1638     info->mallocs      = irecv[4];
1639   }
1640   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1641   info->fill_ratio_needed = 0;
1642   info->factor_mallocs    = 0;
1643   PetscFunctionReturn(PETSC_SUCCESS);
1644 }
1645 
1646 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1647 {
1648   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1649 
1650   PetscFunctionBegin;
1651   switch (op) {
1652   case MAT_NEW_NONZERO_LOCATIONS:
1653   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1654   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1655   case MAT_KEEP_NONZERO_PATTERN:
1656   case MAT_NEW_NONZERO_LOCATION_ERR:
1657   case MAT_USE_INODES:
1658   case MAT_IGNORE_ZERO_ENTRIES:
1659   case MAT_FORM_EXPLICIT_TRANSPOSE:
1660     MatCheckPreallocated(A, 1);
1661     PetscCall(MatSetOption(a->A, op, flg));
1662     PetscCall(MatSetOption(a->B, op, flg));
1663     break;
1664   case MAT_ROW_ORIENTED:
1665     MatCheckPreallocated(A, 1);
1666     a->roworiented = flg;
1667 
1668     PetscCall(MatSetOption(a->A, op, flg));
1669     PetscCall(MatSetOption(a->B, op, flg));
1670     break;
1671   case MAT_FORCE_DIAGONAL_ENTRIES:
1672   case MAT_SORTED_FULL:
1673     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1674     break;
1675   case MAT_IGNORE_OFF_PROC_ENTRIES:
1676     a->donotstash = flg;
1677     break;
1678   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1679   case MAT_SPD:
1680   case MAT_SYMMETRIC:
1681   case MAT_STRUCTURALLY_SYMMETRIC:
1682   case MAT_HERMITIAN:
1683   case MAT_SYMMETRY_ETERNAL:
1684   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1685   case MAT_SPD_ETERNAL:
1686     /* if the diagonal matrix is square it inherits some of the properties above */
1687     break;
1688   case MAT_SUBMAT_SINGLEIS:
1689     A->submat_singleis = flg;
1690     break;
1691   case MAT_STRUCTURE_ONLY:
1692     /* The option is handled directly by MatSetOption() */
1693     break;
1694   default:
1695     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1696   }
1697   PetscFunctionReturn(PETSC_SUCCESS);
1698 }
1699 
1700 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1701 {
1702   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1703   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1704   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1705   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1706   PetscInt    *cmap, *idx_p;
1707 
1708   PetscFunctionBegin;
1709   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1710   mat->getrowactive = PETSC_TRUE;
1711 
1712   if (!mat->rowvalues && (idx || v)) {
1713     /*
1714         allocate enough space to hold information from the longest row.
1715     */
1716     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1717     PetscInt    max = 1, tmp;
1718     for (i = 0; i < matin->rmap->n; i++) {
1719       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1720       if (max < tmp) max = tmp;
1721     }
1722     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1723   }
1724 
1725   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1726   lrow = row - rstart;
1727 
1728   pvA = &vworkA;
1729   pcA = &cworkA;
1730   pvB = &vworkB;
1731   pcB = &cworkB;
1732   if (!v) {
1733     pvA = NULL;
1734     pvB = NULL;
1735   }
1736   if (!idx) {
1737     pcA = NULL;
1738     if (!v) pcB = NULL;
1739   }
1740   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1741   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1742   nztot = nzA + nzB;
1743 
1744   cmap = mat->garray;
1745   if (v || idx) {
1746     if (nztot) {
1747       /* Sort by increasing column numbers, assuming A and B already sorted */
1748       PetscInt imark = -1;
1749       if (v) {
1750         *v = v_p = mat->rowvalues;
1751         for (i = 0; i < nzB; i++) {
1752           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1753           else break;
1754         }
1755         imark = i;
1756         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1757         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1758       }
1759       if (idx) {
1760         *idx = idx_p = mat->rowindices;
1761         if (imark > -1) {
1762           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1763         } else {
1764           for (i = 0; i < nzB; i++) {
1765             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1766             else break;
1767           }
1768           imark = i;
1769         }
1770         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1771         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1772       }
1773     } else {
1774       if (idx) *idx = NULL;
1775       if (v) *v = NULL;
1776     }
1777   }
1778   *nz = nztot;
1779   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1780   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1781   PetscFunctionReturn(PETSC_SUCCESS);
1782 }
1783 
1784 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1785 {
1786   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1787 
1788   PetscFunctionBegin;
1789   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1790   aij->getrowactive = PETSC_FALSE;
1791   PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793 
1794 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1795 {
1796   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1797   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1798   PetscInt         i, j, cstart = mat->cmap->rstart;
1799   PetscReal        sum = 0.0;
1800   const MatScalar *v, *amata, *bmata;
1801 
1802   PetscFunctionBegin;
1803   if (aij->size == 1) {
1804     PetscCall(MatNorm(aij->A, type, norm));
1805   } else {
1806     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1807     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1808     if (type == NORM_FROBENIUS) {
1809       v = amata;
1810       for (i = 0; i < amat->nz; i++) {
1811         sum += PetscRealPart(PetscConj(*v) * (*v));
1812         v++;
1813       }
1814       v = bmata;
1815       for (i = 0; i < bmat->nz; i++) {
1816         sum += PetscRealPart(PetscConj(*v) * (*v));
1817         v++;
1818       }
1819       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1820       *norm = PetscSqrtReal(*norm);
1821       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1822     } else if (type == NORM_1) { /* max column norm */
1823       PetscReal *tmp, *tmp2;
1824       PetscInt  *jj, *garray = aij->garray;
1825       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1826       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1827       *norm = 0.0;
1828       v     = amata;
1829       jj    = amat->j;
1830       for (j = 0; j < amat->nz; j++) {
1831         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1832         v++;
1833       }
1834       v  = bmata;
1835       jj = bmat->j;
1836       for (j = 0; j < bmat->nz; j++) {
1837         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1838         v++;
1839       }
1840       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1841       for (j = 0; j < mat->cmap->N; j++) {
1842         if (tmp2[j] > *norm) *norm = tmp2[j];
1843       }
1844       PetscCall(PetscFree(tmp));
1845       PetscCall(PetscFree(tmp2));
1846       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1847     } else if (type == NORM_INFINITY) { /* max row norm */
1848       PetscReal ntemp = 0.0;
1849       for (j = 0; j < aij->A->rmap->n; j++) {
1850         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1851         sum = 0.0;
1852         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1853           sum += PetscAbsScalar(*v);
1854           v++;
1855         }
1856         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1857         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1858           sum += PetscAbsScalar(*v);
1859           v++;
1860         }
1861         if (sum > ntemp) ntemp = sum;
1862       }
1863       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1864       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1865     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1866     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1867     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1868   }
1869   PetscFunctionReturn(PETSC_SUCCESS);
1870 }
1871 
1872 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1873 {
1874   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1875   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1876   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1877   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1878   Mat              B, A_diag, *B_diag;
1879   const MatScalar *pbv, *bv;
1880 
1881   PetscFunctionBegin;
1882   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1883   ma = A->rmap->n;
1884   na = A->cmap->n;
1885   mb = a->B->rmap->n;
1886   nb = a->B->cmap->n;
1887   ai = Aloc->i;
1888   aj = Aloc->j;
1889   bi = Bloc->i;
1890   bj = Bloc->j;
1891   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1892     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1893     PetscSFNode         *oloc;
1894     PETSC_UNUSED PetscSF sf;
1895 
1896     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1897     /* compute d_nnz for preallocation */
1898     PetscCall(PetscArrayzero(d_nnz, na));
1899     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1900     /* compute local off-diagonal contributions */
1901     PetscCall(PetscArrayzero(g_nnz, nb));
1902     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1903     /* map those to global */
1904     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1905     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1906     PetscCall(PetscSFSetFromOptions(sf));
1907     PetscCall(PetscArrayzero(o_nnz, na));
1908     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1909     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1910     PetscCall(PetscSFDestroy(&sf));
1911 
1912     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1913     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1914     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1915     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1916     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1917     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1918   } else {
1919     B = *matout;
1920     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1921   }
1922 
1923   b           = (Mat_MPIAIJ *)B->data;
1924   A_diag      = a->A;
1925   B_diag      = &b->A;
1926   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1927   A_diag_ncol = A_diag->cmap->N;
1928   B_diag_ilen = sub_B_diag->ilen;
1929   B_diag_i    = sub_B_diag->i;
1930 
1931   /* Set ilen for diagonal of B */
1932   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1933 
1934   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1935   very quickly (=without using MatSetValues), because all writes are local. */
1936   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1937   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1938 
1939   /* copy over the B part */
1940   PetscCall(PetscMalloc1(bi[mb], &cols));
1941   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1942   pbv = bv;
1943   row = A->rmap->rstart;
1944   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1945   cols_tmp = cols;
1946   for (i = 0; i < mb; i++) {
1947     ncol = bi[i + 1] - bi[i];
1948     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1949     row++;
1950     if (pbv) pbv += ncol;
1951     if (cols_tmp) cols_tmp += ncol;
1952   }
1953   PetscCall(PetscFree(cols));
1954   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1955 
1956   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1957   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1958   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1959     *matout = B;
1960   } else {
1961     PetscCall(MatHeaderMerge(A, &B));
1962   }
1963   PetscFunctionReturn(PETSC_SUCCESS);
1964 }
1965 
1966 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1967 {
1968   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1969   Mat         a = aij->A, b = aij->B;
1970   PetscInt    s1, s2, s3;
1971 
1972   PetscFunctionBegin;
1973   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1974   if (rr) {
1975     PetscCall(VecGetLocalSize(rr, &s1));
1976     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1977     /* Overlap communication with computation. */
1978     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1979   }
1980   if (ll) {
1981     PetscCall(VecGetLocalSize(ll, &s1));
1982     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1983     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1984   }
1985   /* scale  the diagonal block */
1986   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1987 
1988   if (rr) {
1989     /* Do a scatter end and then right scale the off-diagonal block */
1990     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1991     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1992   }
1993   PetscFunctionReturn(PETSC_SUCCESS);
1994 }
1995 
1996 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1997 {
1998   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1999 
2000   PetscFunctionBegin;
2001   PetscCall(MatSetUnfactored(a->A));
2002   PetscFunctionReturn(PETSC_SUCCESS);
2003 }
2004 
2005 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2006 {
2007   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2008   Mat         a, b, c, d;
2009   PetscBool   flg;
2010 
2011   PetscFunctionBegin;
2012   a = matA->A;
2013   b = matA->B;
2014   c = matB->A;
2015   d = matB->B;
2016 
2017   PetscCall(MatEqual(a, c, &flg));
2018   if (flg) PetscCall(MatEqual(b, d, &flg));
2019   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2020   PetscFunctionReturn(PETSC_SUCCESS);
2021 }
2022 
2023 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2024 {
2025   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2026   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2027 
2028   PetscFunctionBegin;
2029   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2030   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2031     /* because of the column compression in the off-processor part of the matrix a->B,
2032        the number of columns in a->B and b->B may be different, hence we cannot call
2033        the MatCopy() directly on the two parts. If need be, we can provide a more
2034        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2035        then copying the submatrices */
2036     PetscCall(MatCopy_Basic(A, B, str));
2037   } else {
2038     PetscCall(MatCopy(a->A, b->A, str));
2039     PetscCall(MatCopy(a->B, b->B, str));
2040   }
2041   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2042   PetscFunctionReturn(PETSC_SUCCESS);
2043 }
2044 
2045 /*
2046    Computes the number of nonzeros per row needed for preallocation when X and Y
2047    have different nonzero structure.
2048 */
2049 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2050 {
2051   PetscInt i, j, k, nzx, nzy;
2052 
2053   PetscFunctionBegin;
2054   /* Set the number of nonzeros in the new matrix */
2055   for (i = 0; i < m; i++) {
2056     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2057     nzx    = xi[i + 1] - xi[i];
2058     nzy    = yi[i + 1] - yi[i];
2059     nnz[i] = 0;
2060     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2061       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2062       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2063       nnz[i]++;
2064     }
2065     for (; k < nzy; k++) nnz[i]++;
2066   }
2067   PetscFunctionReturn(PETSC_SUCCESS);
2068 }
2069 
2070 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2071 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2072 {
2073   PetscInt    m = Y->rmap->N;
2074   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2075   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2076 
2077   PetscFunctionBegin;
2078   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2079   PetscFunctionReturn(PETSC_SUCCESS);
2080 }
2081 
2082 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2083 {
2084   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2085 
2086   PetscFunctionBegin;
2087   if (str == SAME_NONZERO_PATTERN) {
2088     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2089     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2090   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2091     PetscCall(MatAXPY_Basic(Y, a, X, str));
2092   } else {
2093     Mat       B;
2094     PetscInt *nnz_d, *nnz_o;
2095 
2096     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2097     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2098     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2099     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2100     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2101     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2102     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2103     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2104     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2105     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2106     PetscCall(MatHeaderMerge(Y, &B));
2107     PetscCall(PetscFree(nnz_d));
2108     PetscCall(PetscFree(nnz_o));
2109   }
2110   PetscFunctionReturn(PETSC_SUCCESS);
2111 }
2112 
2113 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2114 
2115 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2116 {
2117   PetscFunctionBegin;
2118   if (PetscDefined(USE_COMPLEX)) {
2119     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2120 
2121     PetscCall(MatConjugate_SeqAIJ(aij->A));
2122     PetscCall(MatConjugate_SeqAIJ(aij->B));
2123   }
2124   PetscFunctionReturn(PETSC_SUCCESS);
2125 }
2126 
2127 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2128 {
2129   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2130 
2131   PetscFunctionBegin;
2132   PetscCall(MatRealPart(a->A));
2133   PetscCall(MatRealPart(a->B));
2134   PetscFunctionReturn(PETSC_SUCCESS);
2135 }
2136 
2137 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2140 
2141   PetscFunctionBegin;
2142   PetscCall(MatImaginaryPart(a->A));
2143   PetscCall(MatImaginaryPart(a->B));
2144   PetscFunctionReturn(PETSC_SUCCESS);
2145 }
2146 
2147 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2148 {
2149   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2150   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2151   PetscScalar       *va, *vv;
2152   Vec                vB, vA;
2153   const PetscScalar *vb;
2154 
2155   PetscFunctionBegin;
2156   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2157   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2158 
2159   PetscCall(VecGetArrayWrite(vA, &va));
2160   if (idx) {
2161     for (i = 0; i < m; i++) {
2162       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2163     }
2164   }
2165 
2166   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2167   PetscCall(PetscMalloc1(m, &idxb));
2168   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2169 
2170   PetscCall(VecGetArrayWrite(v, &vv));
2171   PetscCall(VecGetArrayRead(vB, &vb));
2172   for (i = 0; i < m; i++) {
2173     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2174       vv[i] = vb[i];
2175       if (idx) idx[i] = a->garray[idxb[i]];
2176     } else {
2177       vv[i] = va[i];
2178       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2179     }
2180   }
2181   PetscCall(VecRestoreArrayWrite(vA, &vv));
2182   PetscCall(VecRestoreArrayWrite(vA, &va));
2183   PetscCall(VecRestoreArrayRead(vB, &vb));
2184   PetscCall(PetscFree(idxb));
2185   PetscCall(VecDestroy(&vA));
2186   PetscCall(VecDestroy(&vB));
2187   PetscFunctionReturn(PETSC_SUCCESS);
2188 }
2189 
2190 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2191 {
2192   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2193   Vec         vB, vA;
2194 
2195   PetscFunctionBegin;
2196   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2197   PetscCall(MatGetRowSumAbs(a->A, vA));
2198   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2199   PetscCall(MatGetRowSumAbs(a->B, vB));
2200   PetscCall(VecAXPY(vA, 1.0, vB));
2201   PetscCall(VecDestroy(&vB));
2202   PetscCall(VecCopy(vA, v));
2203   PetscCall(VecDestroy(&vA));
2204   PetscFunctionReturn(PETSC_SUCCESS);
2205 }
2206 
2207 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2208 {
2209   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2210   PetscInt           m = A->rmap->n, n = A->cmap->n;
2211   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2212   PetscInt          *cmap = mat->garray;
2213   PetscInt          *diagIdx, *offdiagIdx;
2214   Vec                diagV, offdiagV;
2215   PetscScalar       *a, *diagA, *offdiagA;
2216   const PetscScalar *ba, *bav;
2217   PetscInt           r, j, col, ncols, *bi, *bj;
2218   Mat                B = mat->B;
2219   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2220 
2221   PetscFunctionBegin;
2222   /* When a process holds entire A and other processes have no entry */
2223   if (A->cmap->N == n) {
2224     PetscCall(VecGetArrayWrite(v, &diagA));
2225     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2226     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2227     PetscCall(VecDestroy(&diagV));
2228     PetscCall(VecRestoreArrayWrite(v, &diagA));
2229     PetscFunctionReturn(PETSC_SUCCESS);
2230   } else if (n == 0) {
2231     if (m) {
2232       PetscCall(VecGetArrayWrite(v, &a));
2233       for (r = 0; r < m; r++) {
2234         a[r] = 0.0;
2235         if (idx) idx[r] = -1;
2236       }
2237       PetscCall(VecRestoreArrayWrite(v, &a));
2238     }
2239     PetscFunctionReturn(PETSC_SUCCESS);
2240   }
2241 
2242   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2243   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2244   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2245   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2246 
2247   /* Get offdiagIdx[] for implicit 0.0 */
2248   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2249   ba = bav;
2250   bi = b->i;
2251   bj = b->j;
2252   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2253   for (r = 0; r < m; r++) {
2254     ncols = bi[r + 1] - bi[r];
2255     if (ncols == A->cmap->N - n) { /* Brow is dense */
2256       offdiagA[r]   = *ba;
2257       offdiagIdx[r] = cmap[0];
2258     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2259       offdiagA[r] = 0.0;
2260 
2261       /* Find first hole in the cmap */
2262       for (j = 0; j < ncols; j++) {
2263         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2264         if (col > j && j < cstart) {
2265           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2266           break;
2267         } else if (col > j + n && j >= cstart) {
2268           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2269           break;
2270         }
2271       }
2272       if (j == ncols && ncols < A->cmap->N - n) {
2273         /* a hole is outside compressed Bcols */
2274         if (ncols == 0) {
2275           if (cstart) {
2276             offdiagIdx[r] = 0;
2277           } else offdiagIdx[r] = cend;
2278         } else { /* ncols > 0 */
2279           offdiagIdx[r] = cmap[ncols - 1] + 1;
2280           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2281         }
2282       }
2283     }
2284 
2285     for (j = 0; j < ncols; j++) {
2286       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2287         offdiagA[r]   = *ba;
2288         offdiagIdx[r] = cmap[*bj];
2289       }
2290       ba++;
2291       bj++;
2292     }
2293   }
2294 
2295   PetscCall(VecGetArrayWrite(v, &a));
2296   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2297   for (r = 0; r < m; ++r) {
2298     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2299       a[r] = diagA[r];
2300       if (idx) idx[r] = cstart + diagIdx[r];
2301     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2302       a[r] = diagA[r];
2303       if (idx) {
2304         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2305           idx[r] = cstart + diagIdx[r];
2306         } else idx[r] = offdiagIdx[r];
2307       }
2308     } else {
2309       a[r] = offdiagA[r];
2310       if (idx) idx[r] = offdiagIdx[r];
2311     }
2312   }
2313   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2314   PetscCall(VecRestoreArrayWrite(v, &a));
2315   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2316   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2317   PetscCall(VecDestroy(&diagV));
2318   PetscCall(VecDestroy(&offdiagV));
2319   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2320   PetscFunctionReturn(PETSC_SUCCESS);
2321 }
2322 
2323 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2324 {
2325   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2326   PetscInt           m = A->rmap->n, n = A->cmap->n;
2327   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2328   PetscInt          *cmap = mat->garray;
2329   PetscInt          *diagIdx, *offdiagIdx;
2330   Vec                diagV, offdiagV;
2331   PetscScalar       *a, *diagA, *offdiagA;
2332   const PetscScalar *ba, *bav;
2333   PetscInt           r, j, col, ncols, *bi, *bj;
2334   Mat                B = mat->B;
2335   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2336 
2337   PetscFunctionBegin;
2338   /* When a process holds entire A and other processes have no entry */
2339   if (A->cmap->N == n) {
2340     PetscCall(VecGetArrayWrite(v, &diagA));
2341     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2342     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2343     PetscCall(VecDestroy(&diagV));
2344     PetscCall(VecRestoreArrayWrite(v, &diagA));
2345     PetscFunctionReturn(PETSC_SUCCESS);
2346   } else if (n == 0) {
2347     if (m) {
2348       PetscCall(VecGetArrayWrite(v, &a));
2349       for (r = 0; r < m; r++) {
2350         a[r] = PETSC_MAX_REAL;
2351         if (idx) idx[r] = -1;
2352       }
2353       PetscCall(VecRestoreArrayWrite(v, &a));
2354     }
2355     PetscFunctionReturn(PETSC_SUCCESS);
2356   }
2357 
2358   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2359   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2360   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2361   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2362 
2363   /* Get offdiagIdx[] for implicit 0.0 */
2364   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2365   ba = bav;
2366   bi = b->i;
2367   bj = b->j;
2368   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2369   for (r = 0; r < m; r++) {
2370     ncols = bi[r + 1] - bi[r];
2371     if (ncols == A->cmap->N - n) { /* Brow is dense */
2372       offdiagA[r]   = *ba;
2373       offdiagIdx[r] = cmap[0];
2374     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2375       offdiagA[r] = 0.0;
2376 
2377       /* Find first hole in the cmap */
2378       for (j = 0; j < ncols; j++) {
2379         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2380         if (col > j && j < cstart) {
2381           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2382           break;
2383         } else if (col > j + n && j >= cstart) {
2384           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2385           break;
2386         }
2387       }
2388       if (j == ncols && ncols < A->cmap->N - n) {
2389         /* a hole is outside compressed Bcols */
2390         if (ncols == 0) {
2391           if (cstart) {
2392             offdiagIdx[r] = 0;
2393           } else offdiagIdx[r] = cend;
2394         } else { /* ncols > 0 */
2395           offdiagIdx[r] = cmap[ncols - 1] + 1;
2396           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2397         }
2398       }
2399     }
2400 
2401     for (j = 0; j < ncols; j++) {
2402       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2403         offdiagA[r]   = *ba;
2404         offdiagIdx[r] = cmap[*bj];
2405       }
2406       ba++;
2407       bj++;
2408     }
2409   }
2410 
2411   PetscCall(VecGetArrayWrite(v, &a));
2412   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2413   for (r = 0; r < m; ++r) {
2414     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2415       a[r] = diagA[r];
2416       if (idx) idx[r] = cstart + diagIdx[r];
2417     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2418       a[r] = diagA[r];
2419       if (idx) {
2420         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2421           idx[r] = cstart + diagIdx[r];
2422         } else idx[r] = offdiagIdx[r];
2423       }
2424     } else {
2425       a[r] = offdiagA[r];
2426       if (idx) idx[r] = offdiagIdx[r];
2427     }
2428   }
2429   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2430   PetscCall(VecRestoreArrayWrite(v, &a));
2431   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2432   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2433   PetscCall(VecDestroy(&diagV));
2434   PetscCall(VecDestroy(&offdiagV));
2435   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2436   PetscFunctionReturn(PETSC_SUCCESS);
2437 }
2438 
2439 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2440 {
2441   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2442   PetscInt           m = A->rmap->n, n = A->cmap->n;
2443   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2444   PetscInt          *cmap = mat->garray;
2445   PetscInt          *diagIdx, *offdiagIdx;
2446   Vec                diagV, offdiagV;
2447   PetscScalar       *a, *diagA, *offdiagA;
2448   const PetscScalar *ba, *bav;
2449   PetscInt           r, j, col, ncols, *bi, *bj;
2450   Mat                B = mat->B;
2451   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2452 
2453   PetscFunctionBegin;
2454   /* When a process holds entire A and other processes have no entry */
2455   if (A->cmap->N == n) {
2456     PetscCall(VecGetArrayWrite(v, &diagA));
2457     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2458     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2459     PetscCall(VecDestroy(&diagV));
2460     PetscCall(VecRestoreArrayWrite(v, &diagA));
2461     PetscFunctionReturn(PETSC_SUCCESS);
2462   } else if (n == 0) {
2463     if (m) {
2464       PetscCall(VecGetArrayWrite(v, &a));
2465       for (r = 0; r < m; r++) {
2466         a[r] = PETSC_MIN_REAL;
2467         if (idx) idx[r] = -1;
2468       }
2469       PetscCall(VecRestoreArrayWrite(v, &a));
2470     }
2471     PetscFunctionReturn(PETSC_SUCCESS);
2472   }
2473 
2474   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2475   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2476   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2477   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2478 
2479   /* Get offdiagIdx[] for implicit 0.0 */
2480   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2481   ba = bav;
2482   bi = b->i;
2483   bj = b->j;
2484   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2485   for (r = 0; r < m; r++) {
2486     ncols = bi[r + 1] - bi[r];
2487     if (ncols == A->cmap->N - n) { /* Brow is dense */
2488       offdiagA[r]   = *ba;
2489       offdiagIdx[r] = cmap[0];
2490     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2491       offdiagA[r] = 0.0;
2492 
2493       /* Find first hole in the cmap */
2494       for (j = 0; j < ncols; j++) {
2495         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2496         if (col > j && j < cstart) {
2497           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2498           break;
2499         } else if (col > j + n && j >= cstart) {
2500           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2501           break;
2502         }
2503       }
2504       if (j == ncols && ncols < A->cmap->N - n) {
2505         /* a hole is outside compressed Bcols */
2506         if (ncols == 0) {
2507           if (cstart) {
2508             offdiagIdx[r] = 0;
2509           } else offdiagIdx[r] = cend;
2510         } else { /* ncols > 0 */
2511           offdiagIdx[r] = cmap[ncols - 1] + 1;
2512           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2513         }
2514       }
2515     }
2516 
2517     for (j = 0; j < ncols; j++) {
2518       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2519         offdiagA[r]   = *ba;
2520         offdiagIdx[r] = cmap[*bj];
2521       }
2522       ba++;
2523       bj++;
2524     }
2525   }
2526 
2527   PetscCall(VecGetArrayWrite(v, &a));
2528   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2529   for (r = 0; r < m; ++r) {
2530     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2531       a[r] = diagA[r];
2532       if (idx) idx[r] = cstart + diagIdx[r];
2533     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2534       a[r] = diagA[r];
2535       if (idx) {
2536         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2537           idx[r] = cstart + diagIdx[r];
2538         } else idx[r] = offdiagIdx[r];
2539       }
2540     } else {
2541       a[r] = offdiagA[r];
2542       if (idx) idx[r] = offdiagIdx[r];
2543     }
2544   }
2545   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2546   PetscCall(VecRestoreArrayWrite(v, &a));
2547   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2548   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2549   PetscCall(VecDestroy(&diagV));
2550   PetscCall(VecDestroy(&offdiagV));
2551   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2552   PetscFunctionReturn(PETSC_SUCCESS);
2553 }
2554 
2555 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2556 {
2557   Mat *dummy;
2558 
2559   PetscFunctionBegin;
2560   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2561   *newmat = *dummy;
2562   PetscCall(PetscFree(dummy));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2567 {
2568   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatInvertBlockDiagonal(a->A, values));
2572   A->factorerrortype = a->A->factorerrortype;
2573   PetscFunctionReturn(PETSC_SUCCESS);
2574 }
2575 
2576 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2577 {
2578   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2579 
2580   PetscFunctionBegin;
2581   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2582   PetscCall(MatSetRandom(aij->A, rctx));
2583   if (x->assembled) {
2584     PetscCall(MatSetRandom(aij->B, rctx));
2585   } else {
2586     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2587   }
2588   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2589   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2594 {
2595   PetscFunctionBegin;
2596   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2597   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2598   PetscFunctionReturn(PETSC_SUCCESS);
2599 }
2600 
2601 /*@
2602   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2603 
2604   Not Collective
2605 
2606   Input Parameter:
2607 . A - the matrix
2608 
2609   Output Parameter:
2610 . nz - the number of nonzeros
2611 
2612   Level: advanced
2613 
2614 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2615 @*/
2616 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2617 {
2618   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2619   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2620   PetscBool   isaij;
2621 
2622   PetscFunctionBegin;
2623   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2624   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2625   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2626   PetscFunctionReturn(PETSC_SUCCESS);
2627 }
2628 
2629 /*@
2630   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2631 
2632   Collective
2633 
2634   Input Parameters:
2635 + A  - the matrix
2636 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2637 
2638   Level: advanced
2639 
2640 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2641 @*/
2642 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2643 {
2644   PetscFunctionBegin;
2645   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2646   PetscFunctionReturn(PETSC_SUCCESS);
2647 }
2648 
2649 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2650 {
2651   PetscBool sc = PETSC_FALSE, flg;
2652 
2653   PetscFunctionBegin;
2654   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2655   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2656   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2657   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2658   PetscOptionsHeadEnd();
2659   PetscFunctionReturn(PETSC_SUCCESS);
2660 }
2661 
2662 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2663 {
2664   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2665   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2666 
2667   PetscFunctionBegin;
2668   if (!Y->preallocated) {
2669     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2670   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2671     PetscInt nonew = aij->nonew;
2672     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2673     aij->nonew = nonew;
2674   }
2675   PetscCall(MatShift_Basic(Y, a));
2676   PetscFunctionReturn(PETSC_SUCCESS);
2677 }
2678 
2679 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2680 {
2681   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2682 
2683   PetscFunctionBegin;
2684   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2685   PetscCall(MatMissingDiagonal(a->A, missing, d));
2686   if (d) {
2687     PetscInt rstart;
2688     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2689     *d += rstart;
2690   }
2691   PetscFunctionReturn(PETSC_SUCCESS);
2692 }
2693 
2694 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2695 {
2696   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2697 
2698   PetscFunctionBegin;
2699   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2700   PetscFunctionReturn(PETSC_SUCCESS);
2701 }
2702 
2703 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2704 {
2705   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2706 
2707   PetscFunctionBegin;
2708   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2709   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2714                                        MatGetRow_MPIAIJ,
2715                                        MatRestoreRow_MPIAIJ,
2716                                        MatMult_MPIAIJ,
2717                                        /* 4*/ MatMultAdd_MPIAIJ,
2718                                        MatMultTranspose_MPIAIJ,
2719                                        MatMultTransposeAdd_MPIAIJ,
2720                                        NULL,
2721                                        NULL,
2722                                        NULL,
2723                                        /*10*/ NULL,
2724                                        NULL,
2725                                        NULL,
2726                                        MatSOR_MPIAIJ,
2727                                        MatTranspose_MPIAIJ,
2728                                        /*15*/ MatGetInfo_MPIAIJ,
2729                                        MatEqual_MPIAIJ,
2730                                        MatGetDiagonal_MPIAIJ,
2731                                        MatDiagonalScale_MPIAIJ,
2732                                        MatNorm_MPIAIJ,
2733                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2734                                        MatAssemblyEnd_MPIAIJ,
2735                                        MatSetOption_MPIAIJ,
2736                                        MatZeroEntries_MPIAIJ,
2737                                        /*24*/ MatZeroRows_MPIAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                        NULL,
2742                                        /*29*/ MatSetUp_MPI_Hash,
2743                                        NULL,
2744                                        NULL,
2745                                        MatGetDiagonalBlock_MPIAIJ,
2746                                        NULL,
2747                                        /*34*/ MatDuplicate_MPIAIJ,
2748                                        NULL,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        /*39*/ MatAXPY_MPIAIJ,
2753                                        MatCreateSubMatrices_MPIAIJ,
2754                                        MatIncreaseOverlap_MPIAIJ,
2755                                        MatGetValues_MPIAIJ,
2756                                        MatCopy_MPIAIJ,
2757                                        /*44*/ MatGetRowMax_MPIAIJ,
2758                                        MatScale_MPIAIJ,
2759                                        MatShift_MPIAIJ,
2760                                        MatDiagonalSet_MPIAIJ,
2761                                        MatZeroRowsColumns_MPIAIJ,
2762                                        /*49*/ MatSetRandom_MPIAIJ,
2763                                        MatGetRowIJ_MPIAIJ,
2764                                        MatRestoreRowIJ_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2768                                        NULL,
2769                                        MatSetUnfactored_MPIAIJ,
2770                                        MatPermute_MPIAIJ,
2771                                        NULL,
2772                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2773                                        MatDestroy_MPIAIJ,
2774                                        MatView_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                        /*64*/ NULL,
2778                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2783                                        MatGetRowMinAbs_MPIAIJ,
2784                                        NULL,
2785                                        NULL,
2786                                        NULL,
2787                                        NULL,
2788                                        /*75*/ MatFDColoringApply_AIJ,
2789                                        MatSetFromOptions_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        MatFindZeroDiagonals_MPIAIJ,
2793                                        /*80*/ NULL,
2794                                        NULL,
2795                                        NULL,
2796                                        /*83*/ MatLoad_MPIAIJ,
2797                                        NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        /*89*/ NULL,
2803                                        NULL,
2804                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2805                                        NULL,
2806                                        NULL,
2807                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        MatBindToCPU_MPIAIJ,
2812                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2813                                        NULL,
2814                                        NULL,
2815                                        MatConjugate_MPIAIJ,
2816                                        NULL,
2817                                        /*104*/ MatSetValuesRow_MPIAIJ,
2818                                        MatRealPart_MPIAIJ,
2819                                        MatImaginaryPart_MPIAIJ,
2820                                        NULL,
2821                                        NULL,
2822                                        /*109*/ NULL,
2823                                        NULL,
2824                                        MatGetRowMin_MPIAIJ,
2825                                        NULL,
2826                                        MatMissingDiagonal_MPIAIJ,
2827                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2828                                        NULL,
2829                                        MatGetGhosts_MPIAIJ,
2830                                        NULL,
2831                                        NULL,
2832                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        NULL,
2836                                        MatGetMultiProcBlock_MPIAIJ,
2837                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2838                                        MatGetColumnReductions_MPIAIJ,
2839                                        MatInvertBlockDiagonal_MPIAIJ,
2840                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2841                                        MatCreateSubMatricesMPI_MPIAIJ,
2842                                        /*129*/ NULL,
2843                                        NULL,
2844                                        NULL,
2845                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2846                                        NULL,
2847                                        /*134*/ NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        NULL,
2851                                        NULL,
2852                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2853                                        NULL,
2854                                        NULL,
2855                                        MatFDColoringSetUp_MPIXAIJ,
2856                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2857                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2858                                        /*145*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        MatCreateGraph_Simple_AIJ,
2862                                        NULL,
2863                                        /*150*/ NULL,
2864                                        MatEliminateZeros_MPIAIJ,
2865                                        MatGetRowSumAbs_MPIAIJ,
2866                                        NULL};
2867 
2868 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2869 {
2870   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2871 
2872   PetscFunctionBegin;
2873   PetscCall(MatStoreValues(aij->A));
2874   PetscCall(MatStoreValues(aij->B));
2875   PetscFunctionReturn(PETSC_SUCCESS);
2876 }
2877 
2878 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2879 {
2880   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2881 
2882   PetscFunctionBegin;
2883   PetscCall(MatRetrieveValues(aij->A));
2884   PetscCall(MatRetrieveValues(aij->B));
2885   PetscFunctionReturn(PETSC_SUCCESS);
2886 }
2887 
2888 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2889 {
2890   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2891   PetscMPIInt size;
2892 
2893   PetscFunctionBegin;
2894   if (B->hash_active) {
2895     B->ops[0]      = b->cops;
2896     B->hash_active = PETSC_FALSE;
2897   }
2898   PetscCall(PetscLayoutSetUp(B->rmap));
2899   PetscCall(PetscLayoutSetUp(B->cmap));
2900 
2901 #if defined(PETSC_USE_CTABLE)
2902   PetscCall(PetscHMapIDestroy(&b->colmap));
2903 #else
2904   PetscCall(PetscFree(b->colmap));
2905 #endif
2906   PetscCall(PetscFree(b->garray));
2907   PetscCall(VecDestroy(&b->lvec));
2908   PetscCall(VecScatterDestroy(&b->Mvctx));
2909 
2910   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2911 
2912   MatSeqXAIJGetOptions_Private(b->B);
2913   PetscCall(MatDestroy(&b->B));
2914   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2915   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2916   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2917   PetscCall(MatSetType(b->B, MATSEQAIJ));
2918   MatSeqXAIJRestoreOptions_Private(b->B);
2919 
2920   MatSeqXAIJGetOptions_Private(b->A);
2921   PetscCall(MatDestroy(&b->A));
2922   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2923   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2924   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2925   PetscCall(MatSetType(b->A, MATSEQAIJ));
2926   MatSeqXAIJRestoreOptions_Private(b->A);
2927 
2928   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2929   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2930   B->preallocated  = PETSC_TRUE;
2931   B->was_assembled = PETSC_FALSE;
2932   B->assembled     = PETSC_FALSE;
2933   PetscFunctionReturn(PETSC_SUCCESS);
2934 }
2935 
2936 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2937 {
2938   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2939 
2940   PetscFunctionBegin;
2941   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2942   PetscCall(PetscLayoutSetUp(B->rmap));
2943   PetscCall(PetscLayoutSetUp(B->cmap));
2944 
2945 #if defined(PETSC_USE_CTABLE)
2946   PetscCall(PetscHMapIDestroy(&b->colmap));
2947 #else
2948   PetscCall(PetscFree(b->colmap));
2949 #endif
2950   PetscCall(PetscFree(b->garray));
2951   PetscCall(VecDestroy(&b->lvec));
2952   PetscCall(VecScatterDestroy(&b->Mvctx));
2953 
2954   PetscCall(MatResetPreallocation(b->A));
2955   PetscCall(MatResetPreallocation(b->B));
2956   B->preallocated  = PETSC_TRUE;
2957   B->was_assembled = PETSC_FALSE;
2958   B->assembled     = PETSC_FALSE;
2959   PetscFunctionReturn(PETSC_SUCCESS);
2960 }
2961 
2962 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2963 {
2964   Mat         mat;
2965   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2966 
2967   PetscFunctionBegin;
2968   *newmat = NULL;
2969   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2970   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2971   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2972   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2973   a = (Mat_MPIAIJ *)mat->data;
2974 
2975   mat->factortype = matin->factortype;
2976   mat->assembled  = matin->assembled;
2977   mat->insertmode = NOT_SET_VALUES;
2978 
2979   a->size         = oldmat->size;
2980   a->rank         = oldmat->rank;
2981   a->donotstash   = oldmat->donotstash;
2982   a->roworiented  = oldmat->roworiented;
2983   a->rowindices   = NULL;
2984   a->rowvalues    = NULL;
2985   a->getrowactive = PETSC_FALSE;
2986 
2987   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2988   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2989   if (matin->hash_active) {
2990     PetscCall(MatSetUp(mat));
2991   } else {
2992     mat->preallocated = matin->preallocated;
2993     if (oldmat->colmap) {
2994 #if defined(PETSC_USE_CTABLE)
2995       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2996 #else
2997       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2998       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2999 #endif
3000     } else a->colmap = NULL;
3001     if (oldmat->garray) {
3002       PetscInt len;
3003       len = oldmat->B->cmap->n;
3004       PetscCall(PetscMalloc1(len + 1, &a->garray));
3005       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3006     } else a->garray = NULL;
3007 
3008     /* It may happen MatDuplicate is called with a non-assembled matrix
3009       In fact, MatDuplicate only requires the matrix to be preallocated
3010       This may happen inside a DMCreateMatrix_Shell */
3011     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3012     if (oldmat->Mvctx) {
3013       a->Mvctx = oldmat->Mvctx;
3014       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3015     }
3016     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3017     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3018   }
3019   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3020   *newmat = mat;
3021   PetscFunctionReturn(PETSC_SUCCESS);
3022 }
3023 
3024 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3025 {
3026   PetscBool isbinary, ishdf5;
3027 
3028   PetscFunctionBegin;
3029   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3030   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3031   /* force binary viewer to load .info file if it has not yet done so */
3032   PetscCall(PetscViewerSetUp(viewer));
3033   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3034   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3035   if (isbinary) {
3036     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3037   } else if (ishdf5) {
3038 #if defined(PETSC_HAVE_HDF5)
3039     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3040 #else
3041     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3042 #endif
3043   } else {
3044     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3045   }
3046   PetscFunctionReturn(PETSC_SUCCESS);
3047 }
3048 
3049 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3050 {
3051   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3052   PetscInt    *rowidxs, *colidxs;
3053   PetscScalar *matvals;
3054 
3055   PetscFunctionBegin;
3056   PetscCall(PetscViewerSetUp(viewer));
3057 
3058   /* read in matrix header */
3059   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3060   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3061   M  = header[1];
3062   N  = header[2];
3063   nz = header[3];
3064   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3065   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3066   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3067 
3068   /* set block sizes from the viewer's .info file */
3069   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3070   /* set global sizes if not set already */
3071   if (mat->rmap->N < 0) mat->rmap->N = M;
3072   if (mat->cmap->N < 0) mat->cmap->N = N;
3073   PetscCall(PetscLayoutSetUp(mat->rmap));
3074   PetscCall(PetscLayoutSetUp(mat->cmap));
3075 
3076   /* check if the matrix sizes are correct */
3077   PetscCall(MatGetSize(mat, &rows, &cols));
3078   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3079 
3080   /* read in row lengths and build row indices */
3081   PetscCall(MatGetLocalSize(mat, &m, NULL));
3082   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3083   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3084   rowidxs[0] = 0;
3085   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3086   if (nz != PETSC_MAX_INT) {
3087     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3088     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3089   }
3090 
3091   /* read in column indices and matrix values */
3092   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3093   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3094   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3095   /* store matrix indices and values */
3096   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3097   PetscCall(PetscFree(rowidxs));
3098   PetscCall(PetscFree2(colidxs, matvals));
3099   PetscFunctionReturn(PETSC_SUCCESS);
3100 }
3101 
3102 /* Not scalable because of ISAllGather() unless getting all columns. */
3103 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3104 {
3105   IS          iscol_local;
3106   PetscBool   isstride;
3107   PetscMPIInt lisstride = 0, gisstride;
3108 
3109   PetscFunctionBegin;
3110   /* check if we are grabbing all columns*/
3111   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3112 
3113   if (isstride) {
3114     PetscInt start, len, mstart, mlen;
3115     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3116     PetscCall(ISGetLocalSize(iscol, &len));
3117     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3118     if (mstart == start && mlen - mstart == len) lisstride = 1;
3119   }
3120 
3121   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3122   if (gisstride) {
3123     PetscInt N;
3124     PetscCall(MatGetSize(mat, NULL, &N));
3125     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3126     PetscCall(ISSetIdentity(iscol_local));
3127     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3128   } else {
3129     PetscInt cbs;
3130     PetscCall(ISGetBlockSize(iscol, &cbs));
3131     PetscCall(ISAllGather(iscol, &iscol_local));
3132     PetscCall(ISSetBlockSize(iscol_local, cbs));
3133   }
3134 
3135   *isseq = iscol_local;
3136   PetscFunctionReturn(PETSC_SUCCESS);
3137 }
3138 
3139 /*
3140  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3141  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3142 
3143  Input Parameters:
3144 +   mat - matrix
3145 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3146            i.e., mat->rstart <= isrow[i] < mat->rend
3147 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3148            i.e., mat->cstart <= iscol[i] < mat->cend
3149 
3150  Output Parameters:
3151 +   isrow_d - sequential row index set for retrieving mat->A
3152 .   iscol_d - sequential  column index set for retrieving mat->A
3153 .   iscol_o - sequential column index set for retrieving mat->B
3154 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3155  */
3156 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3157 {
3158   Vec             x, cmap;
3159   const PetscInt *is_idx;
3160   PetscScalar    *xarray, *cmaparray;
3161   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3162   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3163   Mat             B    = a->B;
3164   Vec             lvec = a->lvec, lcmap;
3165   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3166   MPI_Comm        comm;
3167   VecScatter      Mvctx = a->Mvctx;
3168 
3169   PetscFunctionBegin;
3170   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3171   PetscCall(ISGetLocalSize(iscol, &ncols));
3172 
3173   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3174   PetscCall(MatCreateVecs(mat, &x, NULL));
3175   PetscCall(VecSet(x, -1.0));
3176   PetscCall(VecDuplicate(x, &cmap));
3177   PetscCall(VecSet(cmap, -1.0));
3178 
3179   /* Get start indices */
3180   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3181   isstart -= ncols;
3182   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3183 
3184   PetscCall(ISGetIndices(iscol, &is_idx));
3185   PetscCall(VecGetArray(x, &xarray));
3186   PetscCall(VecGetArray(cmap, &cmaparray));
3187   PetscCall(PetscMalloc1(ncols, &idx));
3188   for (i = 0; i < ncols; i++) {
3189     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3190     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3191     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3192   }
3193   PetscCall(VecRestoreArray(x, &xarray));
3194   PetscCall(VecRestoreArray(cmap, &cmaparray));
3195   PetscCall(ISRestoreIndices(iscol, &is_idx));
3196 
3197   /* Get iscol_d */
3198   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3199   PetscCall(ISGetBlockSize(iscol, &i));
3200   PetscCall(ISSetBlockSize(*iscol_d, i));
3201 
3202   /* Get isrow_d */
3203   PetscCall(ISGetLocalSize(isrow, &m));
3204   rstart = mat->rmap->rstart;
3205   PetscCall(PetscMalloc1(m, &idx));
3206   PetscCall(ISGetIndices(isrow, &is_idx));
3207   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3208   PetscCall(ISRestoreIndices(isrow, &is_idx));
3209 
3210   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3211   PetscCall(ISGetBlockSize(isrow, &i));
3212   PetscCall(ISSetBlockSize(*isrow_d, i));
3213 
3214   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3215   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3216   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3217 
3218   PetscCall(VecDuplicate(lvec, &lcmap));
3219 
3220   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3221   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3222 
3223   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3224   /* off-process column indices */
3225   count = 0;
3226   PetscCall(PetscMalloc1(Bn, &idx));
3227   PetscCall(PetscMalloc1(Bn, &cmap1));
3228 
3229   PetscCall(VecGetArray(lvec, &xarray));
3230   PetscCall(VecGetArray(lcmap, &cmaparray));
3231   for (i = 0; i < Bn; i++) {
3232     if (PetscRealPart(xarray[i]) > -1.0) {
3233       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3234       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3235       count++;
3236     }
3237   }
3238   PetscCall(VecRestoreArray(lvec, &xarray));
3239   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3240 
3241   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3242   /* cannot ensure iscol_o has same blocksize as iscol! */
3243 
3244   PetscCall(PetscFree(idx));
3245   *garray = cmap1;
3246 
3247   PetscCall(VecDestroy(&x));
3248   PetscCall(VecDestroy(&cmap));
3249   PetscCall(VecDestroy(&lcmap));
3250   PetscFunctionReturn(PETSC_SUCCESS);
3251 }
3252 
3253 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3254 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3255 {
3256   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3257   Mat         M = NULL;
3258   MPI_Comm    comm;
3259   IS          iscol_d, isrow_d, iscol_o;
3260   Mat         Asub = NULL, Bsub = NULL;
3261   PetscInt    n;
3262 
3263   PetscFunctionBegin;
3264   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3265 
3266   if (call == MAT_REUSE_MATRIX) {
3267     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3268     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3269     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3270 
3271     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3272     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3273 
3274     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3275     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3276 
3277     /* Update diagonal and off-diagonal portions of submat */
3278     asub = (Mat_MPIAIJ *)(*submat)->data;
3279     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3280     PetscCall(ISGetLocalSize(iscol_o, &n));
3281     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3282     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3283     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3284 
3285   } else { /* call == MAT_INITIAL_MATRIX) */
3286     const PetscInt *garray;
3287     PetscInt        BsubN;
3288 
3289     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3290     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3291 
3292     /* Create local submatrices Asub and Bsub */
3293     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3294     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3295 
3296     /* Create submatrix M */
3297     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3298 
3299     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3300     asub = (Mat_MPIAIJ *)M->data;
3301 
3302     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3303     n = asub->B->cmap->N;
3304     if (BsubN > n) {
3305       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3306       const PetscInt *idx;
3307       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3308       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3309 
3310       PetscCall(PetscMalloc1(n, &idx_new));
3311       j = 0;
3312       PetscCall(ISGetIndices(iscol_o, &idx));
3313       for (i = 0; i < n; i++) {
3314         if (j >= BsubN) break;
3315         while (subgarray[i] > garray[j]) j++;
3316 
3317         if (subgarray[i] == garray[j]) {
3318           idx_new[i] = idx[j++];
3319         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3320       }
3321       PetscCall(ISRestoreIndices(iscol_o, &idx));
3322 
3323       PetscCall(ISDestroy(&iscol_o));
3324       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3325 
3326     } else if (BsubN < n) {
3327       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3328     }
3329 
3330     PetscCall(PetscFree(garray));
3331     *submat = M;
3332 
3333     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3334     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3335     PetscCall(ISDestroy(&isrow_d));
3336 
3337     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3338     PetscCall(ISDestroy(&iscol_d));
3339 
3340     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3341     PetscCall(ISDestroy(&iscol_o));
3342   }
3343   PetscFunctionReturn(PETSC_SUCCESS);
3344 }
3345 
3346 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3347 {
3348   IS        iscol_local = NULL, isrow_d;
3349   PetscInt  csize;
3350   PetscInt  n, i, j, start, end;
3351   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3352   MPI_Comm  comm;
3353 
3354   PetscFunctionBegin;
3355   /* If isrow has same processor distribution as mat,
3356      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3357   if (call == MAT_REUSE_MATRIX) {
3358     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3359     if (isrow_d) {
3360       sameRowDist  = PETSC_TRUE;
3361       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3362     } else {
3363       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3364       if (iscol_local) {
3365         sameRowDist  = PETSC_TRUE;
3366         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3367       }
3368     }
3369   } else {
3370     /* Check if isrow has same processor distribution as mat */
3371     sameDist[0] = PETSC_FALSE;
3372     PetscCall(ISGetLocalSize(isrow, &n));
3373     if (!n) {
3374       sameDist[0] = PETSC_TRUE;
3375     } else {
3376       PetscCall(ISGetMinMax(isrow, &i, &j));
3377       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3378       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3379     }
3380 
3381     /* Check if iscol has same processor distribution as mat */
3382     sameDist[1] = PETSC_FALSE;
3383     PetscCall(ISGetLocalSize(iscol, &n));
3384     if (!n) {
3385       sameDist[1] = PETSC_TRUE;
3386     } else {
3387       PetscCall(ISGetMinMax(iscol, &i, &j));
3388       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3389       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3390     }
3391 
3392     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3393     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3394     sameRowDist = tsameDist[0];
3395   }
3396 
3397   if (sameRowDist) {
3398     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3399       /* isrow and iscol have same processor distribution as mat */
3400       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3401       PetscFunctionReturn(PETSC_SUCCESS);
3402     } else { /* sameRowDist */
3403       /* isrow has same processor distribution as mat */
3404       if (call == MAT_INITIAL_MATRIX) {
3405         PetscBool sorted;
3406         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3407         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3408         PetscCall(ISGetSize(iscol, &i));
3409         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3410 
3411         PetscCall(ISSorted(iscol_local, &sorted));
3412         if (sorted) {
3413           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3414           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3415           PetscFunctionReturn(PETSC_SUCCESS);
3416         }
3417       } else { /* call == MAT_REUSE_MATRIX */
3418         IS iscol_sub;
3419         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3420         if (iscol_sub) {
3421           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3422           PetscFunctionReturn(PETSC_SUCCESS);
3423         }
3424       }
3425     }
3426   }
3427 
3428   /* General case: iscol -> iscol_local which has global size of iscol */
3429   if (call == MAT_REUSE_MATRIX) {
3430     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3431     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3432   } else {
3433     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3434   }
3435 
3436   PetscCall(ISGetLocalSize(iscol, &csize));
3437   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3438 
3439   if (call == MAT_INITIAL_MATRIX) {
3440     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3441     PetscCall(ISDestroy(&iscol_local));
3442   }
3443   PetscFunctionReturn(PETSC_SUCCESS);
3444 }
3445 
3446 /*@C
3447   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3448   and "off-diagonal" part of the matrix in CSR format.
3449 
3450   Collective
3451 
3452   Input Parameters:
3453 + comm   - MPI communicator
3454 . A      - "diagonal" portion of matrix
3455 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3456 - garray - global index of `B` columns
3457 
3458   Output Parameter:
3459 . mat - the matrix, with input `A` as its local diagonal matrix
3460 
3461   Level: advanced
3462 
3463   Notes:
3464   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3465 
3466   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3467 
3468 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3469 @*/
3470 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3471 {
3472   Mat_MPIAIJ        *maij;
3473   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3474   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3475   const PetscScalar *oa;
3476   Mat                Bnew;
3477   PetscInt           m, n, N;
3478   MatType            mpi_mat_type;
3479 
3480   PetscFunctionBegin;
3481   PetscCall(MatCreate(comm, mat));
3482   PetscCall(MatGetSize(A, &m, &n));
3483   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3484   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3485   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3486   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3487 
3488   /* Get global columns of mat */
3489   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3490 
3491   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3492   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3493   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3494   PetscCall(MatSetType(*mat, mpi_mat_type));
3495 
3496   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3497   maij = (Mat_MPIAIJ *)(*mat)->data;
3498 
3499   (*mat)->preallocated = PETSC_TRUE;
3500 
3501   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3502   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3503 
3504   /* Set A as diagonal portion of *mat */
3505   maij->A = A;
3506 
3507   nz = oi[m];
3508   for (i = 0; i < nz; i++) {
3509     col   = oj[i];
3510     oj[i] = garray[col];
3511   }
3512 
3513   /* Set Bnew as off-diagonal portion of *mat */
3514   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3515   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3516   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3517   bnew        = (Mat_SeqAIJ *)Bnew->data;
3518   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3519   maij->B     = Bnew;
3520 
3521   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3522 
3523   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3524   b->free_a       = PETSC_FALSE;
3525   b->free_ij      = PETSC_FALSE;
3526   PetscCall(MatDestroy(&B));
3527 
3528   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3529   bnew->free_a       = PETSC_TRUE;
3530   bnew->free_ij      = PETSC_TRUE;
3531 
3532   /* condense columns of maij->B */
3533   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3534   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3535   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3536   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3537   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3538   PetscFunctionReturn(PETSC_SUCCESS);
3539 }
3540 
3541 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3542 
3543 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3544 {
3545   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3546   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3547   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3548   Mat             M, Msub, B = a->B;
3549   MatScalar      *aa;
3550   Mat_SeqAIJ     *aij;
3551   PetscInt       *garray = a->garray, *colsub, Ncols;
3552   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3553   IS              iscol_sub, iscmap;
3554   const PetscInt *is_idx, *cmap;
3555   PetscBool       allcolumns = PETSC_FALSE;
3556   MPI_Comm        comm;
3557 
3558   PetscFunctionBegin;
3559   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3560   if (call == MAT_REUSE_MATRIX) {
3561     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3562     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3563     PetscCall(ISGetLocalSize(iscol_sub, &count));
3564 
3565     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3566     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3567 
3568     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3569     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3570 
3571     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3572 
3573   } else { /* call == MAT_INITIAL_MATRIX) */
3574     PetscBool flg;
3575 
3576     PetscCall(ISGetLocalSize(iscol, &n));
3577     PetscCall(ISGetSize(iscol, &Ncols));
3578 
3579     /* (1) iscol -> nonscalable iscol_local */
3580     /* Check for special case: each processor gets entire matrix columns */
3581     PetscCall(ISIdentity(iscol_local, &flg));
3582     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3583     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3584     if (allcolumns) {
3585       iscol_sub = iscol_local;
3586       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3587       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3588 
3589     } else {
3590       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3591       PetscInt *idx, *cmap1, k;
3592       PetscCall(PetscMalloc1(Ncols, &idx));
3593       PetscCall(PetscMalloc1(Ncols, &cmap1));
3594       PetscCall(ISGetIndices(iscol_local, &is_idx));
3595       count = 0;
3596       k     = 0;
3597       for (i = 0; i < Ncols; i++) {
3598         j = is_idx[i];
3599         if (j >= cstart && j < cend) {
3600           /* diagonal part of mat */
3601           idx[count]     = j;
3602           cmap1[count++] = i; /* column index in submat */
3603         } else if (Bn) {
3604           /* off-diagonal part of mat */
3605           if (j == garray[k]) {
3606             idx[count]     = j;
3607             cmap1[count++] = i; /* column index in submat */
3608           } else if (j > garray[k]) {
3609             while (j > garray[k] && k < Bn - 1) k++;
3610             if (j == garray[k]) {
3611               idx[count]     = j;
3612               cmap1[count++] = i; /* column index in submat */
3613             }
3614           }
3615         }
3616       }
3617       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3618 
3619       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3620       PetscCall(ISGetBlockSize(iscol, &cbs));
3621       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3622 
3623       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3624     }
3625 
3626     /* (3) Create sequential Msub */
3627     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3628   }
3629 
3630   PetscCall(ISGetLocalSize(iscol_sub, &count));
3631   aij = (Mat_SeqAIJ *)(Msub)->data;
3632   ii  = aij->i;
3633   PetscCall(ISGetIndices(iscmap, &cmap));
3634 
3635   /*
3636       m - number of local rows
3637       Ncols - number of columns (same on all processors)
3638       rstart - first row in new global matrix generated
3639   */
3640   PetscCall(MatGetSize(Msub, &m, NULL));
3641 
3642   if (call == MAT_INITIAL_MATRIX) {
3643     /* (4) Create parallel newmat */
3644     PetscMPIInt rank, size;
3645     PetscInt    csize;
3646 
3647     PetscCallMPI(MPI_Comm_size(comm, &size));
3648     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3649 
3650     /*
3651         Determine the number of non-zeros in the diagonal and off-diagonal
3652         portions of the matrix in order to do correct preallocation
3653     */
3654 
3655     /* first get start and end of "diagonal" columns */
3656     PetscCall(ISGetLocalSize(iscol, &csize));
3657     if (csize == PETSC_DECIDE) {
3658       PetscCall(ISGetSize(isrow, &mglobal));
3659       if (mglobal == Ncols) { /* square matrix */
3660         nlocal = m;
3661       } else {
3662         nlocal = Ncols / size + ((Ncols % size) > rank);
3663       }
3664     } else {
3665       nlocal = csize;
3666     }
3667     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3668     rstart = rend - nlocal;
3669     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3670 
3671     /* next, compute all the lengths */
3672     jj = aij->j;
3673     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3674     olens = dlens + m;
3675     for (i = 0; i < m; i++) {
3676       jend = ii[i + 1] - ii[i];
3677       olen = 0;
3678       dlen = 0;
3679       for (j = 0; j < jend; j++) {
3680         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3681         else dlen++;
3682         jj++;
3683       }
3684       olens[i] = olen;
3685       dlens[i] = dlen;
3686     }
3687 
3688     PetscCall(ISGetBlockSize(isrow, &bs));
3689     PetscCall(ISGetBlockSize(iscol, &cbs));
3690 
3691     PetscCall(MatCreate(comm, &M));
3692     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3693     PetscCall(MatSetBlockSizes(M, bs, cbs));
3694     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3695     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3696     PetscCall(PetscFree(dlens));
3697 
3698   } else { /* call == MAT_REUSE_MATRIX */
3699     M = *newmat;
3700     PetscCall(MatGetLocalSize(M, &i, NULL));
3701     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3702     PetscCall(MatZeroEntries(M));
3703     /*
3704          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3705        rather than the slower MatSetValues().
3706     */
3707     M->was_assembled = PETSC_TRUE;
3708     M->assembled     = PETSC_FALSE;
3709   }
3710 
3711   /* (5) Set values of Msub to *newmat */
3712   PetscCall(PetscMalloc1(count, &colsub));
3713   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3714 
3715   jj = aij->j;
3716   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3717   for (i = 0; i < m; i++) {
3718     row = rstart + i;
3719     nz  = ii[i + 1] - ii[i];
3720     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3721     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3722     jj += nz;
3723     aa += nz;
3724   }
3725   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3726   PetscCall(ISRestoreIndices(iscmap, &cmap));
3727 
3728   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3729   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3730 
3731   PetscCall(PetscFree(colsub));
3732 
3733   /* save Msub, iscol_sub and iscmap used in processor for next request */
3734   if (call == MAT_INITIAL_MATRIX) {
3735     *newmat = M;
3736     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3737     PetscCall(MatDestroy(&Msub));
3738 
3739     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3740     PetscCall(ISDestroy(&iscol_sub));
3741 
3742     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3743     PetscCall(ISDestroy(&iscmap));
3744 
3745     if (iscol_local) {
3746       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3747       PetscCall(ISDestroy(&iscol_local));
3748     }
3749   }
3750   PetscFunctionReturn(PETSC_SUCCESS);
3751 }
3752 
3753 /*
3754     Not great since it makes two copies of the submatrix, first an SeqAIJ
3755   in local and then by concatenating the local matrices the end result.
3756   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3757 
3758   This requires a sequential iscol with all indices.
3759 */
3760 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3761 {
3762   PetscMPIInt rank, size;
3763   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3764   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3765   Mat         M, Mreuse;
3766   MatScalar  *aa, *vwork;
3767   MPI_Comm    comm;
3768   Mat_SeqAIJ *aij;
3769   PetscBool   colflag, allcolumns = PETSC_FALSE;
3770 
3771   PetscFunctionBegin;
3772   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3773   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3774   PetscCallMPI(MPI_Comm_size(comm, &size));
3775 
3776   /* Check for special case: each processor gets entire matrix columns */
3777   PetscCall(ISIdentity(iscol, &colflag));
3778   PetscCall(ISGetLocalSize(iscol, &n));
3779   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3780   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3781 
3782   if (call == MAT_REUSE_MATRIX) {
3783     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3784     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3785     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3786   } else {
3787     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3788   }
3789 
3790   /*
3791       m - number of local rows
3792       n - number of columns (same on all processors)
3793       rstart - first row in new global matrix generated
3794   */
3795   PetscCall(MatGetSize(Mreuse, &m, &n));
3796   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3797   if (call == MAT_INITIAL_MATRIX) {
3798     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3799     ii  = aij->i;
3800     jj  = aij->j;
3801 
3802     /*
3803         Determine the number of non-zeros in the diagonal and off-diagonal
3804         portions of the matrix in order to do correct preallocation
3805     */
3806 
3807     /* first get start and end of "diagonal" columns */
3808     if (csize == PETSC_DECIDE) {
3809       PetscCall(ISGetSize(isrow, &mglobal));
3810       if (mglobal == n) { /* square matrix */
3811         nlocal = m;
3812       } else {
3813         nlocal = n / size + ((n % size) > rank);
3814       }
3815     } else {
3816       nlocal = csize;
3817     }
3818     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3819     rstart = rend - nlocal;
3820     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3821 
3822     /* next, compute all the lengths */
3823     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3824     olens = dlens + m;
3825     for (i = 0; i < m; i++) {
3826       jend = ii[i + 1] - ii[i];
3827       olen = 0;
3828       dlen = 0;
3829       for (j = 0; j < jend; j++) {
3830         if (*jj < rstart || *jj >= rend) olen++;
3831         else dlen++;
3832         jj++;
3833       }
3834       olens[i] = olen;
3835       dlens[i] = dlen;
3836     }
3837     PetscCall(MatCreate(comm, &M));
3838     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3839     PetscCall(MatSetBlockSizes(M, bs, cbs));
3840     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3841     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3842     PetscCall(PetscFree(dlens));
3843   } else {
3844     PetscInt ml, nl;
3845 
3846     M = *newmat;
3847     PetscCall(MatGetLocalSize(M, &ml, &nl));
3848     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3849     PetscCall(MatZeroEntries(M));
3850     /*
3851          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3852        rather than the slower MatSetValues().
3853     */
3854     M->was_assembled = PETSC_TRUE;
3855     M->assembled     = PETSC_FALSE;
3856   }
3857   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3858   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3859   ii  = aij->i;
3860   jj  = aij->j;
3861 
3862   /* trigger copy to CPU if needed */
3863   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3864   for (i = 0; i < m; i++) {
3865     row   = rstart + i;
3866     nz    = ii[i + 1] - ii[i];
3867     cwork = jj;
3868     jj    = PetscSafePointerPlusOffset(jj, nz);
3869     vwork = aa;
3870     aa    = PetscSafePointerPlusOffset(aa, nz);
3871     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3872   }
3873   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3874 
3875   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3876   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3877   *newmat = M;
3878 
3879   /* save submatrix used in processor for next request */
3880   if (call == MAT_INITIAL_MATRIX) {
3881     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3882     PetscCall(MatDestroy(&Mreuse));
3883   }
3884   PetscFunctionReturn(PETSC_SUCCESS);
3885 }
3886 
3887 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3888 {
3889   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3890   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3891   const PetscInt *JJ;
3892   PetscBool       nooffprocentries;
3893   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3894 
3895   PetscFunctionBegin;
3896   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3897 
3898   PetscCall(PetscLayoutSetUp(B->rmap));
3899   PetscCall(PetscLayoutSetUp(B->cmap));
3900   m      = B->rmap->n;
3901   cstart = B->cmap->rstart;
3902   cend   = B->cmap->rend;
3903   rstart = B->rmap->rstart;
3904 
3905   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3906 
3907   if (PetscDefined(USE_DEBUG)) {
3908     for (i = 0; i < m; i++) {
3909       nnz = Ii[i + 1] - Ii[i];
3910       JJ  = PetscSafePointerPlusOffset(J, Ii[i]);
3911       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3912       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3913       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3914     }
3915   }
3916 
3917   for (i = 0; i < m; i++) {
3918     nnz     = Ii[i + 1] - Ii[i];
3919     JJ      = PetscSafePointerPlusOffset(J, Ii[i]);
3920     nnz_max = PetscMax(nnz_max, nnz);
3921     d       = 0;
3922     for (j = 0; j < nnz; j++) {
3923       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3924     }
3925     d_nnz[i] = d;
3926     o_nnz[i] = nnz - d;
3927   }
3928   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3929   PetscCall(PetscFree2(d_nnz, o_nnz));
3930 
3931   for (i = 0; i < m; i++) {
3932     ii = i + rstart;
3933     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES));
3934   }
3935   nooffprocentries    = B->nooffprocentries;
3936   B->nooffprocentries = PETSC_TRUE;
3937   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3938   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3939   B->nooffprocentries = nooffprocentries;
3940 
3941   /* count number of entries below block diagonal */
3942   PetscCall(PetscFree(Aij->ld));
3943   PetscCall(PetscCalloc1(m, &ld));
3944   Aij->ld = ld;
3945   for (i = 0; i < m; i++) {
3946     nnz = Ii[i + 1] - Ii[i];
3947     j   = 0;
3948     while (j < nnz && J[j] < cstart) j++;
3949     ld[i] = j;
3950     if (J) J += nnz;
3951   }
3952 
3953   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3954   PetscFunctionReturn(PETSC_SUCCESS);
3955 }
3956 
3957 /*@
3958   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3959   (the default parallel PETSc format).
3960 
3961   Collective
3962 
3963   Input Parameters:
3964 + B - the matrix
3965 . i - the indices into `j` for the start of each local row (indices start with zero)
3966 . j - the column indices for each local row (indices start with zero)
3967 - v - optional values in the matrix
3968 
3969   Level: developer
3970 
3971   Notes:
3972   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3973   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3974   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3975 
3976   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3977 
3978   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3979 
3980   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3981 
3982   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3983   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3984 
3985   The format which is used for the sparse matrix input, is equivalent to a
3986   row-major ordering.. i.e for the following matrix, the input data expected is
3987   as shown
3988 .vb
3989         1 0 0
3990         2 0 3     P0
3991        -------
3992         4 5 6     P1
3993 
3994      Process0 [P0] rows_owned=[0,1]
3995         i =  {0,1,3}  [size = nrow+1  = 2+1]
3996         j =  {0,0,2}  [size = 3]
3997         v =  {1,2,3}  [size = 3]
3998 
3999      Process1 [P1] rows_owned=[2]
4000         i =  {0,3}    [size = nrow+1  = 1+1]
4001         j =  {0,1,2}  [size = 3]
4002         v =  {4,5,6}  [size = 3]
4003 .ve
4004 
4005 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4006           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4007 @*/
4008 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4009 {
4010   PetscFunctionBegin;
4011   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4012   PetscFunctionReturn(PETSC_SUCCESS);
4013 }
4014 
4015 /*@
4016   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4017   (the default parallel PETSc format).  For good matrix assembly performance
4018   the user should preallocate the matrix storage by setting the parameters
4019   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4020 
4021   Collective
4022 
4023   Input Parameters:
4024 + B     - the matrix
4025 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4026            (same value is used for all local rows)
4027 . d_nnz - array containing the number of nonzeros in the various rows of the
4028            DIAGONAL portion of the local submatrix (possibly different for each row)
4029            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4030            The size of this array is equal to the number of local rows, i.e 'm'.
4031            For matrices that will be factored, you must leave room for (and set)
4032            the diagonal entry even if it is zero.
4033 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4034            submatrix (same value is used for all local rows).
4035 - o_nnz - array containing the number of nonzeros in the various rows of the
4036            OFF-DIAGONAL portion of the local submatrix (possibly different for
4037            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4038            structure. The size of this array is equal to the number
4039            of local rows, i.e 'm'.
4040 
4041   Example Usage:
4042   Consider the following 8x8 matrix with 34 non-zero values, that is
4043   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4044   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4045   as follows
4046 
4047 .vb
4048             1  2  0  |  0  3  0  |  0  4
4049     Proc0   0  5  6  |  7  0  0  |  8  0
4050             9  0 10  | 11  0  0  | 12  0
4051     -------------------------------------
4052            13  0 14  | 15 16 17  |  0  0
4053     Proc1   0 18  0  | 19 20 21  |  0  0
4054             0  0  0  | 22 23  0  | 24  0
4055     -------------------------------------
4056     Proc2  25 26 27  |  0  0 28  | 29  0
4057            30  0  0  | 31 32 33  |  0 34
4058 .ve
4059 
4060   This can be represented as a collection of submatrices as
4061 .vb
4062       A B C
4063       D E F
4064       G H I
4065 .ve
4066 
4067   Where the submatrices A,B,C are owned by proc0, D,E,F are
4068   owned by proc1, G,H,I are owned by proc2.
4069 
4070   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4071   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4072   The 'M','N' parameters are 8,8, and have the same values on all procs.
4073 
4074   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4075   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4076   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4077   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4078   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4079   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4080 
4081   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4082   allocated for every row of the local diagonal submatrix, and `o_nz`
4083   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4084   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4085   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4086   In this case, the values of `d_nz`, `o_nz` are
4087 .vb
4088      proc0  dnz = 2, o_nz = 2
4089      proc1  dnz = 3, o_nz = 2
4090      proc2  dnz = 1, o_nz = 4
4091 .ve
4092   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4093   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4094   for proc3. i.e we are using 12+15+10=37 storage locations to store
4095   34 values.
4096 
4097   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4098   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4099   In the above case the values for `d_nnz`, `o_nnz` are
4100 .vb
4101      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4102      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4103      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4104 .ve
4105   Here the space allocated is sum of all the above values i.e 34, and
4106   hence pre-allocation is perfect.
4107 
4108   Level: intermediate
4109 
4110   Notes:
4111   If the *_nnz parameter is given then the *_nz parameter is ignored
4112 
4113   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4114   storage.  The stored row and column indices begin with zero.
4115   See [Sparse Matrices](sec_matsparse) for details.
4116 
4117   The parallel matrix is partitioned such that the first m0 rows belong to
4118   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4119   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4120 
4121   The DIAGONAL portion of the local submatrix of a processor can be defined
4122   as the submatrix which is obtained by extraction the part corresponding to
4123   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4124   first row that belongs to the processor, r2 is the last row belonging to
4125   the this processor, and c1-c2 is range of indices of the local part of a
4126   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4127   common case of a square matrix, the row and column ranges are the same and
4128   the DIAGONAL part is also square. The remaining portion of the local
4129   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4130 
4131   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4132 
4133   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4134   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4135   You can also run with the option `-info` and look for messages with the string
4136   malloc in them to see if additional memory allocation was needed.
4137 
4138 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4139           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4140 @*/
4141 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4142 {
4143   PetscFunctionBegin;
4144   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4145   PetscValidType(B, 1);
4146   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4147   PetscFunctionReturn(PETSC_SUCCESS);
4148 }
4149 
4150 /*@
4151   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4152   CSR format for the local rows.
4153 
4154   Collective
4155 
4156   Input Parameters:
4157 + comm - MPI communicator
4158 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4159 . n    - This value should be the same as the local size used in creating the
4160          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4161          calculated if `N` is given) For square matrices n is almost always `m`.
4162 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4163 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4164 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4165 . j    - global column indices
4166 - a    - optional matrix values
4167 
4168   Output Parameter:
4169 . mat - the matrix
4170 
4171   Level: intermediate
4172 
4173   Notes:
4174   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4175   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4176   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4177 
4178   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4179 
4180   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4181 
4182   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4183   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4184 
4185   The format which is used for the sparse matrix input, is equivalent to a
4186   row-major ordering, i.e., for the following matrix, the input data expected is
4187   as shown
4188 .vb
4189         1 0 0
4190         2 0 3     P0
4191        -------
4192         4 5 6     P1
4193 
4194      Process0 [P0] rows_owned=[0,1]
4195         i =  {0,1,3}  [size = nrow+1  = 2+1]
4196         j =  {0,0,2}  [size = 3]
4197         v =  {1,2,3}  [size = 3]
4198 
4199      Process1 [P1] rows_owned=[2]
4200         i =  {0,3}    [size = nrow+1  = 1+1]
4201         j =  {0,1,2}  [size = 3]
4202         v =  {4,5,6}  [size = 3]
4203 .ve
4204 
4205 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4206           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4207 @*/
4208 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4209 {
4210   PetscFunctionBegin;
4211   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4212   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4213   PetscCall(MatCreate(comm, mat));
4214   PetscCall(MatSetSizes(*mat, m, n, M, N));
4215   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4216   PetscCall(MatSetType(*mat, MATMPIAIJ));
4217   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4218   PetscFunctionReturn(PETSC_SUCCESS);
4219 }
4220 
4221 /*@
4222   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4223   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4224   from `MatCreateMPIAIJWithArrays()`
4225 
4226   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4227 
4228   Collective
4229 
4230   Input Parameters:
4231 + mat - the matrix
4232 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4233 . n   - This value should be the same as the local size used in creating the
4234        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4235        calculated if N is given) For square matrices n is almost always m.
4236 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4237 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4238 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4239 . J   - column indices
4240 - v   - matrix values
4241 
4242   Level: deprecated
4243 
4244 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4245           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4246 @*/
4247 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4248 {
4249   PetscInt        nnz, i;
4250   PetscBool       nooffprocentries;
4251   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4252   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4253   PetscScalar    *ad, *ao;
4254   PetscInt        ldi, Iii, md;
4255   const PetscInt *Adi = Ad->i;
4256   PetscInt       *ld  = Aij->ld;
4257 
4258   PetscFunctionBegin;
4259   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4260   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4261   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4262   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4263 
4264   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4265   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4266 
4267   for (i = 0; i < m; i++) {
4268     if (PetscDefined(USE_DEBUG)) {
4269       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4270         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4271         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4272       }
4273     }
4274     nnz = Ii[i + 1] - Ii[i];
4275     Iii = Ii[i];
4276     ldi = ld[i];
4277     md  = Adi[i + 1] - Adi[i];
4278     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4279     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4280     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4281     ad += md;
4282     ao += nnz - md;
4283   }
4284   nooffprocentries      = mat->nooffprocentries;
4285   mat->nooffprocentries = PETSC_TRUE;
4286   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4287   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4288   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4289   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4290   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4291   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4292   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4293   mat->nooffprocentries = nooffprocentries;
4294   PetscFunctionReturn(PETSC_SUCCESS);
4295 }
4296 
4297 /*@
4298   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4299 
4300   Collective
4301 
4302   Input Parameters:
4303 + mat - the matrix
4304 - v   - matrix values, stored by row
4305 
4306   Level: intermediate
4307 
4308   Notes:
4309   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4310 
4311   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4312 
4313 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4314           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4315 @*/
4316 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4317 {
4318   PetscInt        nnz, i, m;
4319   PetscBool       nooffprocentries;
4320   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4321   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4322   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4323   PetscScalar    *ad, *ao;
4324   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4325   PetscInt        ldi, Iii, md;
4326   PetscInt       *ld = Aij->ld;
4327 
4328   PetscFunctionBegin;
4329   m = mat->rmap->n;
4330 
4331   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4332   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4333   Iii = 0;
4334   for (i = 0; i < m; i++) {
4335     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4336     ldi = ld[i];
4337     md  = Adi[i + 1] - Adi[i];
4338     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4339     ad += md;
4340     if (ao) {
4341       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4342       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4343       ao += nnz - md;
4344     }
4345     Iii += nnz;
4346   }
4347   nooffprocentries      = mat->nooffprocentries;
4348   mat->nooffprocentries = PETSC_TRUE;
4349   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4350   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4351   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4352   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4353   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4354   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4355   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4356   mat->nooffprocentries = nooffprocentries;
4357   PetscFunctionReturn(PETSC_SUCCESS);
4358 }
4359 
4360 /*@
4361   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4362   (the default parallel PETSc format).  For good matrix assembly performance
4363   the user should preallocate the matrix storage by setting the parameters
4364   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4365 
4366   Collective
4367 
4368   Input Parameters:
4369 + comm  - MPI communicator
4370 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4371           This value should be the same as the local size used in creating the
4372           y vector for the matrix-vector product y = Ax.
4373 . n     - This value should be the same as the local size used in creating the
4374           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4375           calculated if N is given) For square matrices n is almost always m.
4376 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4377 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4378 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4379           (same value is used for all local rows)
4380 . d_nnz - array containing the number of nonzeros in the various rows of the
4381           DIAGONAL portion of the local submatrix (possibly different for each row)
4382           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4383           The size of this array is equal to the number of local rows, i.e 'm'.
4384 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4385           submatrix (same value is used for all local rows).
4386 - o_nnz - array containing the number of nonzeros in the various rows of the
4387           OFF-DIAGONAL portion of the local submatrix (possibly different for
4388           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4389           structure. The size of this array is equal to the number
4390           of local rows, i.e 'm'.
4391 
4392   Output Parameter:
4393 . A - the matrix
4394 
4395   Options Database Keys:
4396 + -mat_no_inode                     - Do not use inodes
4397 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4398 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4399                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4400                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4401 
4402   Level: intermediate
4403 
4404   Notes:
4405   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4406   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4407   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4408 
4409   If the *_nnz parameter is given then the *_nz parameter is ignored
4410 
4411   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4412   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4413   storage requirements for this matrix.
4414 
4415   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4416   processor than it must be used on all processors that share the object for
4417   that argument.
4418 
4419   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4420   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4421 
4422   The user MUST specify either the local or global matrix dimensions
4423   (possibly both).
4424 
4425   The parallel matrix is partitioned across processors such that the
4426   first `m0` rows belong to process 0, the next `m1` rows belong to
4427   process 1, the next `m2` rows belong to process 2, etc., where
4428   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4429   values corresponding to [m x N] submatrix.
4430 
4431   The columns are logically partitioned with the n0 columns belonging
4432   to 0th partition, the next n1 columns belonging to the next
4433   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4434 
4435   The DIAGONAL portion of the local submatrix on any given processor
4436   is the submatrix corresponding to the rows and columns m,n
4437   corresponding to the given processor. i.e diagonal matrix on
4438   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4439   etc. The remaining portion of the local submatrix [m x (N-n)]
4440   constitute the OFF-DIAGONAL portion. The example below better
4441   illustrates this concept.
4442 
4443   For a square global matrix we define each processor's diagonal portion
4444   to be its local rows and the corresponding columns (a square submatrix);
4445   each processor's off-diagonal portion encompasses the remainder of the
4446   local matrix (a rectangular submatrix).
4447 
4448   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4449 
4450   When calling this routine with a single process communicator, a matrix of
4451   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4452   type of communicator, use the construction mechanism
4453 .vb
4454   MatCreate(..., &A);
4455   MatSetType(A, MATMPIAIJ);
4456   MatSetSizes(A, m, n, M, N);
4457   MatMPIAIJSetPreallocation(A, ...);
4458 .ve
4459 
4460   By default, this format uses inodes (identical nodes) when possible.
4461   We search for consecutive rows with the same nonzero structure, thereby
4462   reusing matrix information to achieve increased efficiency.
4463 
4464   Example Usage:
4465   Consider the following 8x8 matrix with 34 non-zero values, that is
4466   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4467   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4468   as follows
4469 
4470 .vb
4471             1  2  0  |  0  3  0  |  0  4
4472     Proc0   0  5  6  |  7  0  0  |  8  0
4473             9  0 10  | 11  0  0  | 12  0
4474     -------------------------------------
4475            13  0 14  | 15 16 17  |  0  0
4476     Proc1   0 18  0  | 19 20 21  |  0  0
4477             0  0  0  | 22 23  0  | 24  0
4478     -------------------------------------
4479     Proc2  25 26 27  |  0  0 28  | 29  0
4480            30  0  0  | 31 32 33  |  0 34
4481 .ve
4482 
4483   This can be represented as a collection of submatrices as
4484 
4485 .vb
4486       A B C
4487       D E F
4488       G H I
4489 .ve
4490 
4491   Where the submatrices A,B,C are owned by proc0, D,E,F are
4492   owned by proc1, G,H,I are owned by proc2.
4493 
4494   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4495   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4496   The 'M','N' parameters are 8,8, and have the same values on all procs.
4497 
4498   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4499   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4500   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4501   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4502   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4503   matrix, ans [DF] as another SeqAIJ matrix.
4504 
4505   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4506   allocated for every row of the local diagonal submatrix, and `o_nz`
4507   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4508   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4509   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4510   In this case, the values of `d_nz`,`o_nz` are
4511 .vb
4512      proc0  dnz = 2, o_nz = 2
4513      proc1  dnz = 3, o_nz = 2
4514      proc2  dnz = 1, o_nz = 4
4515 .ve
4516   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4517   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4518   for proc3. i.e we are using 12+15+10=37 storage locations to store
4519   34 values.
4520 
4521   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4522   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4523   In the above case the values for d_nnz,o_nnz are
4524 .vb
4525      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4526      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4527      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4528 .ve
4529   Here the space allocated is sum of all the above values i.e 34, and
4530   hence pre-allocation is perfect.
4531 
4532 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4533           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4534           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4535 @*/
4536 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4537 {
4538   PetscMPIInt size;
4539 
4540   PetscFunctionBegin;
4541   PetscCall(MatCreate(comm, A));
4542   PetscCall(MatSetSizes(*A, m, n, M, N));
4543   PetscCallMPI(MPI_Comm_size(comm, &size));
4544   if (size > 1) {
4545     PetscCall(MatSetType(*A, MATMPIAIJ));
4546     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4547   } else {
4548     PetscCall(MatSetType(*A, MATSEQAIJ));
4549     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4550   }
4551   PetscFunctionReturn(PETSC_SUCCESS);
4552 }
4553 
4554 /*MC
4555     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4556 
4557     Synopsis:
4558     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4559 
4560     Not Collective
4561 
4562     Input Parameter:
4563 .   A - the `MATMPIAIJ` matrix
4564 
4565     Output Parameters:
4566 +   Ad - the diagonal portion of the matrix
4567 .   Ao - the off-diagonal portion of the matrix
4568 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4569 -   ierr - error code
4570 
4571      Level: advanced
4572 
4573     Note:
4574     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4575 
4576 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4577 M*/
4578 
4579 /*MC
4580     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4581 
4582     Synopsis:
4583     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4584 
4585     Not Collective
4586 
4587     Input Parameters:
4588 +   A - the `MATMPIAIJ` matrix
4589 .   Ad - the diagonal portion of the matrix
4590 .   Ao - the off-diagonal portion of the matrix
4591 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4592 -   ierr - error code
4593 
4594      Level: advanced
4595 
4596 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4597 M*/
4598 
4599 /*@C
4600   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4601 
4602   Not Collective
4603 
4604   Input Parameter:
4605 . A - The `MATMPIAIJ` matrix
4606 
4607   Output Parameters:
4608 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4609 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4610 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4611 
4612   Level: intermediate
4613 
4614   Note:
4615   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4616   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4617   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4618   local column numbers to global column numbers in the original matrix.
4619 
4620   Fortran Notes:
4621   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4622 
4623 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4624 @*/
4625 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4626 {
4627   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4628   PetscBool   flg;
4629 
4630   PetscFunctionBegin;
4631   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4632   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4633   if (Ad) *Ad = a->A;
4634   if (Ao) *Ao = a->B;
4635   if (colmap) *colmap = a->garray;
4636   PetscFunctionReturn(PETSC_SUCCESS);
4637 }
4638 
4639 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4640 {
4641   PetscInt     m, N, i, rstart, nnz, Ii;
4642   PetscInt    *indx;
4643   PetscScalar *values;
4644   MatType      rootType;
4645 
4646   PetscFunctionBegin;
4647   PetscCall(MatGetSize(inmat, &m, &N));
4648   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4649     PetscInt *dnz, *onz, sum, bs, cbs;
4650 
4651     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4652     /* Check sum(n) = N */
4653     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4654     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4655 
4656     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4657     rstart -= m;
4658 
4659     MatPreallocateBegin(comm, m, n, dnz, onz);
4660     for (i = 0; i < m; i++) {
4661       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4662       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4663       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4664     }
4665 
4666     PetscCall(MatCreate(comm, outmat));
4667     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4668     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4669     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4670     PetscCall(MatGetRootType_Private(inmat, &rootType));
4671     PetscCall(MatSetType(*outmat, rootType));
4672     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4673     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4674     MatPreallocateEnd(dnz, onz);
4675     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4676   }
4677 
4678   /* numeric phase */
4679   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4680   for (i = 0; i < m; i++) {
4681     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4682     Ii = i + rstart;
4683     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4684     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4685   }
4686   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4687   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4688   PetscFunctionReturn(PETSC_SUCCESS);
4689 }
4690 
4691 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4692 {
4693   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4694 
4695   PetscFunctionBegin;
4696   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4697   PetscCall(PetscFree(merge->id_r));
4698   PetscCall(PetscFree(merge->len_s));
4699   PetscCall(PetscFree(merge->len_r));
4700   PetscCall(PetscFree(merge->bi));
4701   PetscCall(PetscFree(merge->bj));
4702   PetscCall(PetscFree(merge->buf_ri[0]));
4703   PetscCall(PetscFree(merge->buf_ri));
4704   PetscCall(PetscFree(merge->buf_rj[0]));
4705   PetscCall(PetscFree(merge->buf_rj));
4706   PetscCall(PetscFree(merge->coi));
4707   PetscCall(PetscFree(merge->coj));
4708   PetscCall(PetscFree(merge->owners_co));
4709   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4710   PetscCall(PetscFree(merge));
4711   PetscFunctionReturn(PETSC_SUCCESS);
4712 }
4713 
4714 #include <../src/mat/utils/freespace.h>
4715 #include <petscbt.h>
4716 
4717 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4718 {
4719   MPI_Comm             comm;
4720   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4721   PetscMPIInt          size, rank, taga, *len_s;
4722   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4723   PetscInt             proc, m;
4724   PetscInt           **buf_ri, **buf_rj;
4725   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4726   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4727   MPI_Request         *s_waits, *r_waits;
4728   MPI_Status          *status;
4729   const MatScalar     *aa, *a_a;
4730   MatScalar          **abuf_r, *ba_i;
4731   Mat_Merge_SeqsToMPI *merge;
4732   PetscContainer       container;
4733 
4734   PetscFunctionBegin;
4735   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4736   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4737 
4738   PetscCallMPI(MPI_Comm_size(comm, &size));
4739   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4740 
4741   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4742   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4743   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4744   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4745   aa = a_a;
4746 
4747   bi     = merge->bi;
4748   bj     = merge->bj;
4749   buf_ri = merge->buf_ri;
4750   buf_rj = merge->buf_rj;
4751 
4752   PetscCall(PetscMalloc1(size, &status));
4753   owners = merge->rowmap->range;
4754   len_s  = merge->len_s;
4755 
4756   /* send and recv matrix values */
4757   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4758   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4759 
4760   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4761   for (proc = 0, k = 0; proc < size; proc++) {
4762     if (!len_s[proc]) continue;
4763     i = owners[proc];
4764     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4765     k++;
4766   }
4767 
4768   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4769   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4770   PetscCall(PetscFree(status));
4771 
4772   PetscCall(PetscFree(s_waits));
4773   PetscCall(PetscFree(r_waits));
4774 
4775   /* insert mat values of mpimat */
4776   PetscCall(PetscMalloc1(N, &ba_i));
4777   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4778 
4779   for (k = 0; k < merge->nrecv; k++) {
4780     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4781     nrows       = *buf_ri_k[k];
4782     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4783     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4784   }
4785 
4786   /* set values of ba */
4787   m = merge->rowmap->n;
4788   for (i = 0; i < m; i++) {
4789     arow = owners[rank] + i;
4790     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4791     bnzi = bi[i + 1] - bi[i];
4792     PetscCall(PetscArrayzero(ba_i, bnzi));
4793 
4794     /* add local non-zero vals of this proc's seqmat into ba */
4795     anzi   = ai[arow + 1] - ai[arow];
4796     aj     = a->j + ai[arow];
4797     aa     = a_a + ai[arow];
4798     nextaj = 0;
4799     for (j = 0; nextaj < anzi; j++) {
4800       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4801         ba_i[j] += aa[nextaj++];
4802       }
4803     }
4804 
4805     /* add received vals into ba */
4806     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4807       /* i-th row */
4808       if (i == *nextrow[k]) {
4809         anzi   = *(nextai[k] + 1) - *nextai[k];
4810         aj     = buf_rj[k] + *nextai[k];
4811         aa     = abuf_r[k] + *nextai[k];
4812         nextaj = 0;
4813         for (j = 0; nextaj < anzi; j++) {
4814           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4815             ba_i[j] += aa[nextaj++];
4816           }
4817         }
4818         nextrow[k]++;
4819         nextai[k]++;
4820       }
4821     }
4822     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4823   }
4824   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4825   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4826   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4827 
4828   PetscCall(PetscFree(abuf_r[0]));
4829   PetscCall(PetscFree(abuf_r));
4830   PetscCall(PetscFree(ba_i));
4831   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4832   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4833   PetscFunctionReturn(PETSC_SUCCESS);
4834 }
4835 
4836 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4837 {
4838   Mat                  B_mpi;
4839   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4840   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4841   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4842   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4843   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4844   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4845   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4846   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4847   MPI_Status          *status;
4848   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4849   PetscBT              lnkbt;
4850   Mat_Merge_SeqsToMPI *merge;
4851   PetscContainer       container;
4852 
4853   PetscFunctionBegin;
4854   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4855 
4856   /* make sure it is a PETSc comm */
4857   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4858   PetscCallMPI(MPI_Comm_size(comm, &size));
4859   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4860 
4861   PetscCall(PetscNew(&merge));
4862   PetscCall(PetscMalloc1(size, &status));
4863 
4864   /* determine row ownership */
4865   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4866   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4867   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4868   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4869   PetscCall(PetscLayoutSetUp(merge->rowmap));
4870   PetscCall(PetscMalloc1(size, &len_si));
4871   PetscCall(PetscMalloc1(size, &merge->len_s));
4872 
4873   m      = merge->rowmap->n;
4874   owners = merge->rowmap->range;
4875 
4876   /* determine the number of messages to send, their lengths */
4877   len_s = merge->len_s;
4878 
4879   len          = 0; /* length of buf_si[] */
4880   merge->nsend = 0;
4881   for (proc = 0; proc < size; proc++) {
4882     len_si[proc] = 0;
4883     if (proc == rank) {
4884       len_s[proc] = 0;
4885     } else {
4886       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4887       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4888     }
4889     if (len_s[proc]) {
4890       merge->nsend++;
4891       nrows = 0;
4892       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4893         if (ai[i + 1] > ai[i]) nrows++;
4894       }
4895       len_si[proc] = 2 * (nrows + 1);
4896       len += len_si[proc];
4897     }
4898   }
4899 
4900   /* determine the number and length of messages to receive for ij-structure */
4901   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4902   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4903 
4904   /* post the Irecv of j-structure */
4905   PetscCall(PetscCommGetNewTag(comm, &tagj));
4906   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4907 
4908   /* post the Isend of j-structure */
4909   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4910 
4911   for (proc = 0, k = 0; proc < size; proc++) {
4912     if (!len_s[proc]) continue;
4913     i = owners[proc];
4914     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4915     k++;
4916   }
4917 
4918   /* receives and sends of j-structure are complete */
4919   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4920   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4921 
4922   /* send and recv i-structure */
4923   PetscCall(PetscCommGetNewTag(comm, &tagi));
4924   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4925 
4926   PetscCall(PetscMalloc1(len + 1, &buf_s));
4927   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4928   for (proc = 0, k = 0; proc < size; proc++) {
4929     if (!len_s[proc]) continue;
4930     /* form outgoing message for i-structure:
4931          buf_si[0]:                 nrows to be sent
4932                [1:nrows]:           row index (global)
4933                [nrows+1:2*nrows+1]: i-structure index
4934     */
4935     nrows       = len_si[proc] / 2 - 1;
4936     buf_si_i    = buf_si + nrows + 1;
4937     buf_si[0]   = nrows;
4938     buf_si_i[0] = 0;
4939     nrows       = 0;
4940     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4941       anzi = ai[i + 1] - ai[i];
4942       if (anzi) {
4943         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4944         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4945         nrows++;
4946       }
4947     }
4948     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4949     k++;
4950     buf_si += len_si[proc];
4951   }
4952 
4953   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4954   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4955 
4956   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4957   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4958 
4959   PetscCall(PetscFree(len_si));
4960   PetscCall(PetscFree(len_ri));
4961   PetscCall(PetscFree(rj_waits));
4962   PetscCall(PetscFree2(si_waits, sj_waits));
4963   PetscCall(PetscFree(ri_waits));
4964   PetscCall(PetscFree(buf_s));
4965   PetscCall(PetscFree(status));
4966 
4967   /* compute a local seq matrix in each processor */
4968   /* allocate bi array and free space for accumulating nonzero column info */
4969   PetscCall(PetscMalloc1(m + 1, &bi));
4970   bi[0] = 0;
4971 
4972   /* create and initialize a linked list */
4973   nlnk = N + 1;
4974   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4975 
4976   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4977   len = ai[owners[rank + 1]] - ai[owners[rank]];
4978   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4979 
4980   current_space = free_space;
4981 
4982   /* determine symbolic info for each local row */
4983   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4984 
4985   for (k = 0; k < merge->nrecv; k++) {
4986     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4987     nrows       = *buf_ri_k[k];
4988     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4989     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4990   }
4991 
4992   MatPreallocateBegin(comm, m, n, dnz, onz);
4993   len = 0;
4994   for (i = 0; i < m; i++) {
4995     bnzi = 0;
4996     /* add local non-zero cols of this proc's seqmat into lnk */
4997     arow = owners[rank] + i;
4998     anzi = ai[arow + 1] - ai[arow];
4999     aj   = a->j + ai[arow];
5000     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5001     bnzi += nlnk;
5002     /* add received col data into lnk */
5003     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5004       if (i == *nextrow[k]) {            /* i-th row */
5005         anzi = *(nextai[k] + 1) - *nextai[k];
5006         aj   = buf_rj[k] + *nextai[k];
5007         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5008         bnzi += nlnk;
5009         nextrow[k]++;
5010         nextai[k]++;
5011       }
5012     }
5013     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5014 
5015     /* if free space is not available, make more free space */
5016     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5017     /* copy data into free space, then initialize lnk */
5018     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5019     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5020 
5021     current_space->array += bnzi;
5022     current_space->local_used += bnzi;
5023     current_space->local_remaining -= bnzi;
5024 
5025     bi[i + 1] = bi[i] + bnzi;
5026   }
5027 
5028   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5029 
5030   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5031   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5032   PetscCall(PetscLLDestroy(lnk, lnkbt));
5033 
5034   /* create symbolic parallel matrix B_mpi */
5035   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5036   PetscCall(MatCreate(comm, &B_mpi));
5037   if (n == PETSC_DECIDE) {
5038     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5039   } else {
5040     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5041   }
5042   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5043   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5044   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5045   MatPreallocateEnd(dnz, onz);
5046   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5047 
5048   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5049   B_mpi->assembled = PETSC_FALSE;
5050   merge->bi        = bi;
5051   merge->bj        = bj;
5052   merge->buf_ri    = buf_ri;
5053   merge->buf_rj    = buf_rj;
5054   merge->coi       = NULL;
5055   merge->coj       = NULL;
5056   merge->owners_co = NULL;
5057 
5058   PetscCall(PetscCommDestroy(&comm));
5059 
5060   /* attach the supporting struct to B_mpi for reuse */
5061   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5062   PetscCall(PetscContainerSetPointer(container, merge));
5063   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5064   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5065   PetscCall(PetscContainerDestroy(&container));
5066   *mpimat = B_mpi;
5067 
5068   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5069   PetscFunctionReturn(PETSC_SUCCESS);
5070 }
5071 
5072 /*@
5073   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5074   matrices from each processor
5075 
5076   Collective
5077 
5078   Input Parameters:
5079 + comm   - the communicators the parallel matrix will live on
5080 . seqmat - the input sequential matrices
5081 . m      - number of local rows (or `PETSC_DECIDE`)
5082 . n      - number of local columns (or `PETSC_DECIDE`)
5083 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5084 
5085   Output Parameter:
5086 . mpimat - the parallel matrix generated
5087 
5088   Level: advanced
5089 
5090   Note:
5091   The dimensions of the sequential matrix in each processor MUST be the same.
5092   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5093   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5094 
5095 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5096 @*/
5097 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5098 {
5099   PetscMPIInt size;
5100 
5101   PetscFunctionBegin;
5102   PetscCallMPI(MPI_Comm_size(comm, &size));
5103   if (size == 1) {
5104     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5105     if (scall == MAT_INITIAL_MATRIX) {
5106       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5107     } else {
5108       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5109     }
5110     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5111     PetscFunctionReturn(PETSC_SUCCESS);
5112   }
5113   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5114   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5115   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5116   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5117   PetscFunctionReturn(PETSC_SUCCESS);
5118 }
5119 
5120 /*@
5121   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5122 
5123   Not Collective
5124 
5125   Input Parameter:
5126 . A - the matrix
5127 
5128   Output Parameter:
5129 . A_loc - the local sequential matrix generated
5130 
5131   Level: developer
5132 
5133   Notes:
5134   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5135   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5136   `n` is the global column count obtained with `MatGetSize()`
5137 
5138   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5139 
5140   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5141 
5142   Destroy the matrix with `MatDestroy()`
5143 
5144 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5145 @*/
5146 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5147 {
5148   PetscBool mpi;
5149 
5150   PetscFunctionBegin;
5151   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5152   if (mpi) {
5153     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5154   } else {
5155     *A_loc = A;
5156     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5157   }
5158   PetscFunctionReturn(PETSC_SUCCESS);
5159 }
5160 
5161 /*@
5162   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5163 
5164   Not Collective
5165 
5166   Input Parameters:
5167 + A     - the matrix
5168 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5169 
5170   Output Parameter:
5171 . A_loc - the local sequential matrix generated
5172 
5173   Level: developer
5174 
5175   Notes:
5176   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5177   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5178   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5179 
5180   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5181 
5182   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5183   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5184   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5185   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5186 
5187 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5188 @*/
5189 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5190 {
5191   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5192   Mat_SeqAIJ        *mat, *a, *b;
5193   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5194   const PetscScalar *aa, *ba, *aav, *bav;
5195   PetscScalar       *ca, *cam;
5196   PetscMPIInt        size;
5197   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5198   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5199   PetscBool          match;
5200 
5201   PetscFunctionBegin;
5202   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5203   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5204   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5205   if (size == 1) {
5206     if (scall == MAT_INITIAL_MATRIX) {
5207       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5208       *A_loc = mpimat->A;
5209     } else if (scall == MAT_REUSE_MATRIX) {
5210       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5211     }
5212     PetscFunctionReturn(PETSC_SUCCESS);
5213   }
5214 
5215   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5216   a  = (Mat_SeqAIJ *)mpimat->A->data;
5217   b  = (Mat_SeqAIJ *)mpimat->B->data;
5218   ai = a->i;
5219   aj = a->j;
5220   bi = b->i;
5221   bj = b->j;
5222   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5223   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5224   aa = aav;
5225   ba = bav;
5226   if (scall == MAT_INITIAL_MATRIX) {
5227     PetscCall(PetscMalloc1(1 + am, &ci));
5228     ci[0] = 0;
5229     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5230     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5231     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5232     k = 0;
5233     for (i = 0; i < am; i++) {
5234       ncols_o = bi[i + 1] - bi[i];
5235       ncols_d = ai[i + 1] - ai[i];
5236       /* off-diagonal portion of A */
5237       for (jo = 0; jo < ncols_o; jo++) {
5238         col = cmap[*bj];
5239         if (col >= cstart) break;
5240         cj[k] = col;
5241         bj++;
5242         ca[k++] = *ba++;
5243       }
5244       /* diagonal portion of A */
5245       for (j = 0; j < ncols_d; j++) {
5246         cj[k]   = cstart + *aj++;
5247         ca[k++] = *aa++;
5248       }
5249       /* off-diagonal portion of A */
5250       for (j = jo; j < ncols_o; j++) {
5251         cj[k]   = cmap[*bj++];
5252         ca[k++] = *ba++;
5253       }
5254     }
5255     /* put together the new matrix */
5256     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5257     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5258     /* Since these are PETSc arrays, change flags to free them as necessary. */
5259     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5260     mat->free_a  = PETSC_TRUE;
5261     mat->free_ij = PETSC_TRUE;
5262     mat->nonew   = 0;
5263   } else if (scall == MAT_REUSE_MATRIX) {
5264     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5265     ci  = mat->i;
5266     cj  = mat->j;
5267     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5268     for (i = 0; i < am; i++) {
5269       /* off-diagonal portion of A */
5270       ncols_o = bi[i + 1] - bi[i];
5271       for (jo = 0; jo < ncols_o; jo++) {
5272         col = cmap[*bj];
5273         if (col >= cstart) break;
5274         *cam++ = *ba++;
5275         bj++;
5276       }
5277       /* diagonal portion of A */
5278       ncols_d = ai[i + 1] - ai[i];
5279       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5280       /* off-diagonal portion of A */
5281       for (j = jo; j < ncols_o; j++) {
5282         *cam++ = *ba++;
5283         bj++;
5284       }
5285     }
5286     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5287   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5288   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5289   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5290   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5291   PetscFunctionReturn(PETSC_SUCCESS);
5292 }
5293 
5294 /*@
5295   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5296   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5297 
5298   Not Collective
5299 
5300   Input Parameters:
5301 + A     - the matrix
5302 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5303 
5304   Output Parameters:
5305 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5306 - A_loc - the local sequential matrix generated
5307 
5308   Level: developer
5309 
5310   Note:
5311   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5312   part, then those associated with the off-diagonal part (in its local ordering)
5313 
5314 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5315 @*/
5316 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5317 {
5318   Mat             Ao, Ad;
5319   const PetscInt *cmap;
5320   PetscMPIInt     size;
5321   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5322 
5323   PetscFunctionBegin;
5324   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5325   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5326   if (size == 1) {
5327     if (scall == MAT_INITIAL_MATRIX) {
5328       PetscCall(PetscObjectReference((PetscObject)Ad));
5329       *A_loc = Ad;
5330     } else if (scall == MAT_REUSE_MATRIX) {
5331       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5332     }
5333     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5334     PetscFunctionReturn(PETSC_SUCCESS);
5335   }
5336   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5337   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5338   if (f) {
5339     PetscCall((*f)(A, scall, glob, A_loc));
5340   } else {
5341     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5342     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5343     Mat_SeqAIJ        *c;
5344     PetscInt          *ai = a->i, *aj = a->j;
5345     PetscInt          *bi = b->i, *bj = b->j;
5346     PetscInt          *ci, *cj;
5347     const PetscScalar *aa, *ba;
5348     PetscScalar       *ca;
5349     PetscInt           i, j, am, dn, on;
5350 
5351     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5352     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5353     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5354     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5355     if (scall == MAT_INITIAL_MATRIX) {
5356       PetscInt k;
5357       PetscCall(PetscMalloc1(1 + am, &ci));
5358       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5359       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5360       ci[0] = 0;
5361       for (i = 0, k = 0; i < am; i++) {
5362         const PetscInt ncols_o = bi[i + 1] - bi[i];
5363         const PetscInt ncols_d = ai[i + 1] - ai[i];
5364         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5365         /* diagonal portion of A */
5366         for (j = 0; j < ncols_d; j++, k++) {
5367           cj[k] = *aj++;
5368           ca[k] = *aa++;
5369         }
5370         /* off-diagonal portion of A */
5371         for (j = 0; j < ncols_o; j++, k++) {
5372           cj[k] = dn + *bj++;
5373           ca[k] = *ba++;
5374         }
5375       }
5376       /* put together the new matrix */
5377       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5378       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5379       /* Since these are PETSc arrays, change flags to free them as necessary. */
5380       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5381       c->free_a  = PETSC_TRUE;
5382       c->free_ij = PETSC_TRUE;
5383       c->nonew   = 0;
5384       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5385     } else if (scall == MAT_REUSE_MATRIX) {
5386       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5387       for (i = 0; i < am; i++) {
5388         const PetscInt ncols_d = ai[i + 1] - ai[i];
5389         const PetscInt ncols_o = bi[i + 1] - bi[i];
5390         /* diagonal portion of A */
5391         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5392         /* off-diagonal portion of A */
5393         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5394       }
5395       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5396     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5397     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5398     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5399     if (glob) {
5400       PetscInt cst, *gidx;
5401 
5402       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5403       PetscCall(PetscMalloc1(dn + on, &gidx));
5404       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5405       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5406       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5407     }
5408   }
5409   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5410   PetscFunctionReturn(PETSC_SUCCESS);
5411 }
5412 
5413 /*@C
5414   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5415 
5416   Not Collective
5417 
5418   Input Parameters:
5419 + A     - the matrix
5420 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5421 . row   - index set of rows to extract (or `NULL`)
5422 - col   - index set of columns to extract (or `NULL`)
5423 
5424   Output Parameter:
5425 . A_loc - the local sequential matrix generated
5426 
5427   Level: developer
5428 
5429 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5430 @*/
5431 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5432 {
5433   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5434   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5435   IS          isrowa, iscola;
5436   Mat        *aloc;
5437   PetscBool   match;
5438 
5439   PetscFunctionBegin;
5440   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5441   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5442   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5443   if (!row) {
5444     start = A->rmap->rstart;
5445     end   = A->rmap->rend;
5446     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5447   } else {
5448     isrowa = *row;
5449   }
5450   if (!col) {
5451     start = A->cmap->rstart;
5452     cmap  = a->garray;
5453     nzA   = a->A->cmap->n;
5454     nzB   = a->B->cmap->n;
5455     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5456     ncols = 0;
5457     for (i = 0; i < nzB; i++) {
5458       if (cmap[i] < start) idx[ncols++] = cmap[i];
5459       else break;
5460     }
5461     imark = i;
5462     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5463     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5464     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5465   } else {
5466     iscola = *col;
5467   }
5468   if (scall != MAT_INITIAL_MATRIX) {
5469     PetscCall(PetscMalloc1(1, &aloc));
5470     aloc[0] = *A_loc;
5471   }
5472   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5473   if (!col) { /* attach global id of condensed columns */
5474     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5475   }
5476   *A_loc = aloc[0];
5477   PetscCall(PetscFree(aloc));
5478   if (!row) PetscCall(ISDestroy(&isrowa));
5479   if (!col) PetscCall(ISDestroy(&iscola));
5480   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5481   PetscFunctionReturn(PETSC_SUCCESS);
5482 }
5483 
5484 /*
5485  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5486  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5487  * on a global size.
5488  * */
5489 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5490 {
5491   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5492   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5493   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5494   PetscMPIInt            owner;
5495   PetscSFNode           *iremote, *oiremote;
5496   const PetscInt        *lrowindices;
5497   PetscSF                sf, osf;
5498   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5499   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5500   MPI_Comm               comm;
5501   ISLocalToGlobalMapping mapping;
5502   const PetscScalar     *pd_a, *po_a;
5503 
5504   PetscFunctionBegin;
5505   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5506   /* plocalsize is the number of roots
5507    * nrows is the number of leaves
5508    * */
5509   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5510   PetscCall(ISGetLocalSize(rows, &nrows));
5511   PetscCall(PetscCalloc1(nrows, &iremote));
5512   PetscCall(ISGetIndices(rows, &lrowindices));
5513   for (i = 0; i < nrows; i++) {
5514     /* Find a remote index and an owner for a row
5515      * The row could be local or remote
5516      * */
5517     owner = 0;
5518     lidx  = 0;
5519     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5520     iremote[i].index = lidx;
5521     iremote[i].rank  = owner;
5522   }
5523   /* Create SF to communicate how many nonzero columns for each row */
5524   PetscCall(PetscSFCreate(comm, &sf));
5525   /* SF will figure out the number of nonzero columns for each row, and their
5526    * offsets
5527    * */
5528   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5529   PetscCall(PetscSFSetFromOptions(sf));
5530   PetscCall(PetscSFSetUp(sf));
5531 
5532   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5533   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5534   PetscCall(PetscCalloc1(nrows, &pnnz));
5535   roffsets[0] = 0;
5536   roffsets[1] = 0;
5537   for (i = 0; i < plocalsize; i++) {
5538     /* diagonal */
5539     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5540     /* off-diagonal */
5541     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5542     /* compute offsets so that we relative location for each row */
5543     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5544     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5545   }
5546   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5547   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5548   /* 'r' means root, and 'l' means leaf */
5549   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5550   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5551   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5552   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5553   PetscCall(PetscSFDestroy(&sf));
5554   PetscCall(PetscFree(roffsets));
5555   PetscCall(PetscFree(nrcols));
5556   dntotalcols = 0;
5557   ontotalcols = 0;
5558   ncol        = 0;
5559   for (i = 0; i < nrows; i++) {
5560     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5561     ncol    = PetscMax(pnnz[i], ncol);
5562     /* diagonal */
5563     dntotalcols += nlcols[i * 2 + 0];
5564     /* off-diagonal */
5565     ontotalcols += nlcols[i * 2 + 1];
5566   }
5567   /* We do not need to figure the right number of columns
5568    * since all the calculations will be done by going through the raw data
5569    * */
5570   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5571   PetscCall(MatSetUp(*P_oth));
5572   PetscCall(PetscFree(pnnz));
5573   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5574   /* diagonal */
5575   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5576   /* off-diagonal */
5577   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5578   /* diagonal */
5579   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5580   /* off-diagonal */
5581   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5582   dntotalcols = 0;
5583   ontotalcols = 0;
5584   ntotalcols  = 0;
5585   for (i = 0; i < nrows; i++) {
5586     owner = 0;
5587     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5588     /* Set iremote for diag matrix */
5589     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5590       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5591       iremote[dntotalcols].rank  = owner;
5592       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5593       ilocal[dntotalcols++] = ntotalcols++;
5594     }
5595     /* off-diagonal */
5596     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5597       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5598       oiremote[ontotalcols].rank  = owner;
5599       oilocal[ontotalcols++]      = ntotalcols++;
5600     }
5601   }
5602   PetscCall(ISRestoreIndices(rows, &lrowindices));
5603   PetscCall(PetscFree(loffsets));
5604   PetscCall(PetscFree(nlcols));
5605   PetscCall(PetscSFCreate(comm, &sf));
5606   /* P serves as roots and P_oth is leaves
5607    * Diag matrix
5608    * */
5609   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5610   PetscCall(PetscSFSetFromOptions(sf));
5611   PetscCall(PetscSFSetUp(sf));
5612 
5613   PetscCall(PetscSFCreate(comm, &osf));
5614   /* off-diagonal */
5615   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5616   PetscCall(PetscSFSetFromOptions(osf));
5617   PetscCall(PetscSFSetUp(osf));
5618   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5619   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5620   /* operate on the matrix internal data to save memory */
5621   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5622   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5623   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5624   /* Convert to global indices for diag matrix */
5625   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5626   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5627   /* We want P_oth store global indices */
5628   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5629   /* Use memory scalable approach */
5630   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5631   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5632   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5633   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5634   /* Convert back to local indices */
5635   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5636   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5637   nout = 0;
5638   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5639   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5640   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5641   /* Exchange values */
5642   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5643   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5644   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5645   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5646   /* Stop PETSc from shrinking memory */
5647   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5648   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5649   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5650   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5651   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5652   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5653   PetscCall(PetscSFDestroy(&sf));
5654   PetscCall(PetscSFDestroy(&osf));
5655   PetscFunctionReturn(PETSC_SUCCESS);
5656 }
5657 
5658 /*
5659  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5660  * This supports MPIAIJ and MAIJ
5661  * */
5662 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5663 {
5664   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5665   Mat_SeqAIJ *p_oth;
5666   IS          rows, map;
5667   PetscHMapI  hamp;
5668   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5669   MPI_Comm    comm;
5670   PetscSF     sf, osf;
5671   PetscBool   has;
5672 
5673   PetscFunctionBegin;
5674   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5675   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5676   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5677    *  and then create a submatrix (that often is an overlapping matrix)
5678    * */
5679   if (reuse == MAT_INITIAL_MATRIX) {
5680     /* Use a hash table to figure out unique keys */
5681     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5682     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5683     count = 0;
5684     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5685     for (i = 0; i < a->B->cmap->n; i++) {
5686       key = a->garray[i] / dof;
5687       PetscCall(PetscHMapIHas(hamp, key, &has));
5688       if (!has) {
5689         mapping[i] = count;
5690         PetscCall(PetscHMapISet(hamp, key, count++));
5691       } else {
5692         /* Current 'i' has the same value the previous step */
5693         mapping[i] = count - 1;
5694       }
5695     }
5696     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5697     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5698     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5699     PetscCall(PetscCalloc1(htsize, &rowindices));
5700     off = 0;
5701     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5702     PetscCall(PetscHMapIDestroy(&hamp));
5703     PetscCall(PetscSortInt(htsize, rowindices));
5704     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5705     /* In case, the matrix was already created but users want to recreate the matrix */
5706     PetscCall(MatDestroy(P_oth));
5707     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5708     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5709     PetscCall(ISDestroy(&map));
5710     PetscCall(ISDestroy(&rows));
5711   } else if (reuse == MAT_REUSE_MATRIX) {
5712     /* If matrix was already created, we simply update values using SF objects
5713      * that as attached to the matrix earlier.
5714      */
5715     const PetscScalar *pd_a, *po_a;
5716 
5717     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5718     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5719     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5720     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5721     /* Update values in place */
5722     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5723     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5724     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5725     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5726     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5727     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5728     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5729     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5730   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5731   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5732   PetscFunctionReturn(PETSC_SUCCESS);
5733 }
5734 
5735 /*@C
5736   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5737 
5738   Collective
5739 
5740   Input Parameters:
5741 + A     - the first matrix in `MATMPIAIJ` format
5742 . B     - the second matrix in `MATMPIAIJ` format
5743 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5744 
5745   Output Parameters:
5746 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5747 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5748 - B_seq - the sequential matrix generated
5749 
5750   Level: developer
5751 
5752 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5753 @*/
5754 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5755 {
5756   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5757   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5758   IS          isrowb, iscolb;
5759   Mat        *bseq = NULL;
5760 
5761   PetscFunctionBegin;
5762   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5763              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5764   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5765 
5766   if (scall == MAT_INITIAL_MATRIX) {
5767     start = A->cmap->rstart;
5768     cmap  = a->garray;
5769     nzA   = a->A->cmap->n;
5770     nzB   = a->B->cmap->n;
5771     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5772     ncols = 0;
5773     for (i = 0; i < nzB; i++) { /* row < local row index */
5774       if (cmap[i] < start) idx[ncols++] = cmap[i];
5775       else break;
5776     }
5777     imark = i;
5778     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5779     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5780     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5781     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5782   } else {
5783     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5784     isrowb = *rowb;
5785     iscolb = *colb;
5786     PetscCall(PetscMalloc1(1, &bseq));
5787     bseq[0] = *B_seq;
5788   }
5789   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5790   *B_seq = bseq[0];
5791   PetscCall(PetscFree(bseq));
5792   if (!rowb) {
5793     PetscCall(ISDestroy(&isrowb));
5794   } else {
5795     *rowb = isrowb;
5796   }
5797   if (!colb) {
5798     PetscCall(ISDestroy(&iscolb));
5799   } else {
5800     *colb = iscolb;
5801   }
5802   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5803   PetscFunctionReturn(PETSC_SUCCESS);
5804 }
5805 
5806 /*
5807     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5808     of the OFF-DIAGONAL portion of local A
5809 
5810     Collective
5811 
5812    Input Parameters:
5813 +    A,B - the matrices in `MATMPIAIJ` format
5814 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5815 
5816    Output Parameter:
5817 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5818 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5819 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5820 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5821 
5822     Developer Note:
5823     This directly accesses information inside the VecScatter associated with the matrix-vector product
5824      for this matrix. This is not desirable..
5825 
5826     Level: developer
5827 
5828 */
5829 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5830 {
5831   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5832   Mat_SeqAIJ        *b_oth;
5833   VecScatter         ctx;
5834   MPI_Comm           comm;
5835   const PetscMPIInt *rprocs, *sprocs;
5836   const PetscInt    *srow, *rstarts, *sstarts;
5837   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5838   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5839   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5840   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5841   PetscMPIInt        size, tag, rank, nreqs;
5842 
5843   PetscFunctionBegin;
5844   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5845   PetscCallMPI(MPI_Comm_size(comm, &size));
5846 
5847   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5848              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5849   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5850   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5851 
5852   if (size == 1) {
5853     startsj_s = NULL;
5854     bufa_ptr  = NULL;
5855     *B_oth    = NULL;
5856     PetscFunctionReturn(PETSC_SUCCESS);
5857   }
5858 
5859   ctx = a->Mvctx;
5860   tag = ((PetscObject)ctx)->tag;
5861 
5862   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5863   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5864   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5865   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5866   PetscCall(PetscMalloc1(nreqs, &reqs));
5867   rwaits = reqs;
5868   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5869 
5870   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5871   if (scall == MAT_INITIAL_MATRIX) {
5872     /* i-array */
5873     /*  post receives */
5874     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5875     for (i = 0; i < nrecvs; i++) {
5876       rowlen = rvalues + rstarts[i] * rbs;
5877       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5878       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5879     }
5880 
5881     /* pack the outgoing message */
5882     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5883 
5884     sstartsj[0] = 0;
5885     rstartsj[0] = 0;
5886     len         = 0; /* total length of j or a array to be sent */
5887     if (nsends) {
5888       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5889       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5890     }
5891     for (i = 0; i < nsends; i++) {
5892       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5893       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5894       for (j = 0; j < nrows; j++) {
5895         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5896         for (l = 0; l < sbs; l++) {
5897           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5898 
5899           rowlen[j * sbs + l] = ncols;
5900 
5901           len += ncols;
5902           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5903         }
5904         k++;
5905       }
5906       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5907 
5908       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5909     }
5910     /* recvs and sends of i-array are completed */
5911     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5912     PetscCall(PetscFree(svalues));
5913 
5914     /* allocate buffers for sending j and a arrays */
5915     PetscCall(PetscMalloc1(len + 1, &bufj));
5916     PetscCall(PetscMalloc1(len + 1, &bufa));
5917 
5918     /* create i-array of B_oth */
5919     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5920 
5921     b_othi[0] = 0;
5922     len       = 0; /* total length of j or a array to be received */
5923     k         = 0;
5924     for (i = 0; i < nrecvs; i++) {
5925       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5926       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5927       for (j = 0; j < nrows; j++) {
5928         b_othi[k + 1] = b_othi[k] + rowlen[j];
5929         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5930         k++;
5931       }
5932       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5933     }
5934     PetscCall(PetscFree(rvalues));
5935 
5936     /* allocate space for j and a arrays of B_oth */
5937     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5938     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5939 
5940     /* j-array */
5941     /*  post receives of j-array */
5942     for (i = 0; i < nrecvs; i++) {
5943       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5944       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5945     }
5946 
5947     /* pack the outgoing message j-array */
5948     if (nsends) k = sstarts[0];
5949     for (i = 0; i < nsends; i++) {
5950       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5951       bufJ  = bufj + sstartsj[i];
5952       for (j = 0; j < nrows; j++) {
5953         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5954         for (ll = 0; ll < sbs; ll++) {
5955           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5956           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5957           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5958         }
5959       }
5960       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5961     }
5962 
5963     /* recvs and sends of j-array are completed */
5964     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5965   } else if (scall == MAT_REUSE_MATRIX) {
5966     sstartsj = *startsj_s;
5967     rstartsj = *startsj_r;
5968     bufa     = *bufa_ptr;
5969     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5970     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5971   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5972 
5973   /* a-array */
5974   /*  post receives of a-array */
5975   for (i = 0; i < nrecvs; i++) {
5976     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5977     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5978   }
5979 
5980   /* pack the outgoing message a-array */
5981   if (nsends) k = sstarts[0];
5982   for (i = 0; i < nsends; i++) {
5983     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5984     bufA  = bufa + sstartsj[i];
5985     for (j = 0; j < nrows; j++) {
5986       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5987       for (ll = 0; ll < sbs; ll++) {
5988         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5989         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5990         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5991       }
5992     }
5993     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5994   }
5995   /* recvs and sends of a-array are completed */
5996   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5997   PetscCall(PetscFree(reqs));
5998 
5999   if (scall == MAT_INITIAL_MATRIX) {
6000     /* put together the new matrix */
6001     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6002 
6003     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6004     /* Since these are PETSc arrays, change flags to free them as necessary. */
6005     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6006     b_oth->free_a  = PETSC_TRUE;
6007     b_oth->free_ij = PETSC_TRUE;
6008     b_oth->nonew   = 0;
6009 
6010     PetscCall(PetscFree(bufj));
6011     if (!startsj_s || !bufa_ptr) {
6012       PetscCall(PetscFree2(sstartsj, rstartsj));
6013       PetscCall(PetscFree(bufa_ptr));
6014     } else {
6015       *startsj_s = sstartsj;
6016       *startsj_r = rstartsj;
6017       *bufa_ptr  = bufa;
6018     }
6019   } else if (scall == MAT_REUSE_MATRIX) {
6020     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6021   }
6022 
6023   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6024   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6025   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6026   PetscFunctionReturn(PETSC_SUCCESS);
6027 }
6028 
6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6032 #if defined(PETSC_HAVE_MKL_SPARSE)
6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6034 #endif
6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6037 #if defined(PETSC_HAVE_ELEMENTAL)
6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6039 #endif
6040 #if defined(PETSC_HAVE_SCALAPACK)
6041 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6042 #endif
6043 #if defined(PETSC_HAVE_HYPRE)
6044 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6045 #endif
6046 #if defined(PETSC_HAVE_CUDA)
6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6048 #endif
6049 #if defined(PETSC_HAVE_HIP)
6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6051 #endif
6052 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6054 #endif
6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6056 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6057 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6058 
6059 /*
6060     Computes (B'*A')' since computing B*A directly is untenable
6061 
6062                n                       p                          p
6063         [             ]       [             ]         [                 ]
6064       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6065         [             ]       [             ]         [                 ]
6066 
6067 */
6068 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6069 {
6070   Mat At, Bt, Ct;
6071 
6072   PetscFunctionBegin;
6073   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6074   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6075   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6076   PetscCall(MatDestroy(&At));
6077   PetscCall(MatDestroy(&Bt));
6078   PetscCall(MatTransposeSetPrecursor(Ct, C));
6079   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6080   PetscCall(MatDestroy(&Ct));
6081   PetscFunctionReturn(PETSC_SUCCESS);
6082 }
6083 
6084 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6085 {
6086   PetscBool cisdense;
6087 
6088   PetscFunctionBegin;
6089   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6090   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6091   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6092   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6093   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6094   PetscCall(MatSetUp(C));
6095 
6096   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6097   PetscFunctionReturn(PETSC_SUCCESS);
6098 }
6099 
6100 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6101 {
6102   Mat_Product *product = C->product;
6103   Mat          A = product->A, B = product->B;
6104 
6105   PetscFunctionBegin;
6106   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6107              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6108   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6109   C->ops->productsymbolic = MatProductSymbolic_AB;
6110   PetscFunctionReturn(PETSC_SUCCESS);
6111 }
6112 
6113 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6114 {
6115   Mat_Product *product = C->product;
6116 
6117   PetscFunctionBegin;
6118   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6119   PetscFunctionReturn(PETSC_SUCCESS);
6120 }
6121 
6122 /*
6123    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6124 
6125   Input Parameters:
6126 
6127     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6128     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6129 
6130     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6131 
6132     For Set1, j1[] contains column indices of the nonzeros.
6133     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6134     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6135     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6136 
6137     Similar for Set2.
6138 
6139     This routine merges the two sets of nonzeros row by row and removes repeats.
6140 
6141   Output Parameters: (memory is allocated by the caller)
6142 
6143     i[],j[]: the CSR of the merged matrix, which has m rows.
6144     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6145     imap2[]: similar to imap1[], but for Set2.
6146     Note we order nonzeros row-by-row and from left to right.
6147 */
6148 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6149 {
6150   PetscInt   r, m; /* Row index of mat */
6151   PetscCount t, t1, t2, b1, e1, b2, e2;
6152 
6153   PetscFunctionBegin;
6154   PetscCall(MatGetLocalSize(mat, &m, NULL));
6155   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6156   i[0]        = 0;
6157   for (r = 0; r < m; r++) { /* Do row by row merging */
6158     b1 = rowBegin1[r];
6159     e1 = rowEnd1[r];
6160     b2 = rowBegin2[r];
6161     e2 = rowEnd2[r];
6162     while (b1 < e1 && b2 < e2) {
6163       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6164         j[t]      = j1[b1];
6165         imap1[t1] = t;
6166         imap2[t2] = t;
6167         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6168         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6169         t1++;
6170         t2++;
6171         t++;
6172       } else if (j1[b1] < j2[b2]) {
6173         j[t]      = j1[b1];
6174         imap1[t1] = t;
6175         b1 += jmap1[t1 + 1] - jmap1[t1];
6176         t1++;
6177         t++;
6178       } else {
6179         j[t]      = j2[b2];
6180         imap2[t2] = t;
6181         b2 += jmap2[t2 + 1] - jmap2[t2];
6182         t2++;
6183         t++;
6184       }
6185     }
6186     /* Merge the remaining in either j1[] or j2[] */
6187     while (b1 < e1) {
6188       j[t]      = j1[b1];
6189       imap1[t1] = t;
6190       b1 += jmap1[t1 + 1] - jmap1[t1];
6191       t1++;
6192       t++;
6193     }
6194     while (b2 < e2) {
6195       j[t]      = j2[b2];
6196       imap2[t2] = t;
6197       b2 += jmap2[t2 + 1] - jmap2[t2];
6198       t2++;
6199       t++;
6200     }
6201     i[r + 1] = t;
6202   }
6203   PetscFunctionReturn(PETSC_SUCCESS);
6204 }
6205 
6206 /*
6207   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6208 
6209   Input Parameters:
6210     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6211     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6212       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6213 
6214       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6215       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6216 
6217   Output Parameters:
6218     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6219     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6220       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6221       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6222 
6223     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6224       Atot: number of entries belonging to the diagonal block.
6225       Annz: number of unique nonzeros belonging to the diagonal block.
6226       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6227         repeats (i.e., same 'i,j' pair).
6228       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6229         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6230 
6231       Atot: number of entries belonging to the diagonal block
6232       Annz: number of unique nonzeros belonging to the diagonal block.
6233 
6234     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6235 
6236     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6237 */
6238 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6239 {
6240   PetscInt    cstart, cend, rstart, rend, row, col;
6241   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6242   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6243   PetscCount  k, m, p, q, r, s, mid;
6244   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6245 
6246   PetscFunctionBegin;
6247   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6248   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6249   m = rend - rstart;
6250 
6251   /* Skip negative rows */
6252   for (k = 0; k < n; k++)
6253     if (i[k] >= 0) break;
6254 
6255   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6256      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6257   */
6258   while (k < n) {
6259     row = i[k];
6260     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6261     for (s = k; s < n; s++)
6262       if (i[s] != row) break;
6263 
6264     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6265     for (p = k; p < s; p++) {
6266       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6267       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6268     }
6269     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6270     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6271     rowBegin[row - rstart] = k;
6272     rowMid[row - rstart]   = mid;
6273     rowEnd[row - rstart]   = s;
6274 
6275     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6276     Atot += mid - k;
6277     Btot += s - mid;
6278 
6279     /* Count unique nonzeros of this diag row */
6280     for (p = k; p < mid;) {
6281       col = j[p];
6282       do {
6283         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6284         p++;
6285       } while (p < mid && j[p] == col);
6286       Annz++;
6287     }
6288 
6289     /* Count unique nonzeros of this offdiag row */
6290     for (p = mid; p < s;) {
6291       col = j[p];
6292       do {
6293         p++;
6294       } while (p < s && j[p] == col);
6295       Bnnz++;
6296     }
6297     k = s;
6298   }
6299 
6300   /* Allocation according to Atot, Btot, Annz, Bnnz */
6301   PetscCall(PetscMalloc1(Atot, &Aperm));
6302   PetscCall(PetscMalloc1(Btot, &Bperm));
6303   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6304   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6305 
6306   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6307   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6308   for (r = 0; r < m; r++) {
6309     k   = rowBegin[r];
6310     mid = rowMid[r];
6311     s   = rowEnd[r];
6312     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6313     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6314     Atot += mid - k;
6315     Btot += s - mid;
6316 
6317     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6318     for (p = k; p < mid;) {
6319       col = j[p];
6320       q   = p;
6321       do {
6322         p++;
6323       } while (p < mid && j[p] == col);
6324       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6325       Annz++;
6326     }
6327 
6328     for (p = mid; p < s;) {
6329       col = j[p];
6330       q   = p;
6331       do {
6332         p++;
6333       } while (p < s && j[p] == col);
6334       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6335       Bnnz++;
6336     }
6337   }
6338   /* Output */
6339   *Aperm_ = Aperm;
6340   *Annz_  = Annz;
6341   *Atot_  = Atot;
6342   *Ajmap_ = Ajmap;
6343   *Bperm_ = Bperm;
6344   *Bnnz_  = Bnnz;
6345   *Btot_  = Btot;
6346   *Bjmap_ = Bjmap;
6347   PetscFunctionReturn(PETSC_SUCCESS);
6348 }
6349 
6350 /*
6351   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6352 
6353   Input Parameters:
6354     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6355     nnz:  number of unique nonzeros in the merged matrix
6356     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6357     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6358 
6359   Output Parameter: (memory is allocated by the caller)
6360     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6361 
6362   Example:
6363     nnz1 = 4
6364     nnz  = 6
6365     imap = [1,3,4,5]
6366     jmap = [0,3,5,6,7]
6367    then,
6368     jmap_new = [0,0,3,3,5,6,7]
6369 */
6370 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6371 {
6372   PetscCount k, p;
6373 
6374   PetscFunctionBegin;
6375   jmap_new[0] = 0;
6376   p           = nnz;                /* p loops over jmap_new[] backwards */
6377   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6378     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6379   }
6380   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6381   PetscFunctionReturn(PETSC_SUCCESS);
6382 }
6383 
6384 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6385 {
6386   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6387 
6388   PetscFunctionBegin;
6389   PetscCall(PetscSFDestroy(&coo->sf));
6390   PetscCall(PetscFree(coo->Aperm1));
6391   PetscCall(PetscFree(coo->Bperm1));
6392   PetscCall(PetscFree(coo->Ajmap1));
6393   PetscCall(PetscFree(coo->Bjmap1));
6394   PetscCall(PetscFree(coo->Aimap2));
6395   PetscCall(PetscFree(coo->Bimap2));
6396   PetscCall(PetscFree(coo->Aperm2));
6397   PetscCall(PetscFree(coo->Bperm2));
6398   PetscCall(PetscFree(coo->Ajmap2));
6399   PetscCall(PetscFree(coo->Bjmap2));
6400   PetscCall(PetscFree(coo->Cperm1));
6401   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6402   PetscCall(PetscFree(coo));
6403   PetscFunctionReturn(PETSC_SUCCESS);
6404 }
6405 
6406 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6407 {
6408   MPI_Comm             comm;
6409   PetscMPIInt          rank, size;
6410   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6411   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6412   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6413   PetscContainer       container;
6414   MatCOOStruct_MPIAIJ *coo;
6415 
6416   PetscFunctionBegin;
6417   PetscCall(PetscFree(mpiaij->garray));
6418   PetscCall(VecDestroy(&mpiaij->lvec));
6419 #if defined(PETSC_USE_CTABLE)
6420   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6421 #else
6422   PetscCall(PetscFree(mpiaij->colmap));
6423 #endif
6424   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6425   mat->assembled     = PETSC_FALSE;
6426   mat->was_assembled = PETSC_FALSE;
6427 
6428   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6429   PetscCallMPI(MPI_Comm_size(comm, &size));
6430   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6431   PetscCall(PetscLayoutSetUp(mat->rmap));
6432   PetscCall(PetscLayoutSetUp(mat->cmap));
6433   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6434   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6435   PetscCall(MatGetLocalSize(mat, &m, &n));
6436   PetscCall(MatGetSize(mat, &M, &N));
6437 
6438   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6439   /* entries come first, then local rows, then remote rows.                     */
6440   PetscCount n1 = coo_n, *perm1;
6441   PetscInt  *i1 = coo_i, *j1 = coo_j;
6442 
6443   PetscCall(PetscMalloc1(n1, &perm1));
6444   for (k = 0; k < n1; k++) perm1[k] = k;
6445 
6446   /* Manipulate indices so that entries with negative row or col indices will have smallest
6447      row indices, local entries will have greater but negative row indices, and remote entries
6448      will have positive row indices.
6449   */
6450   for (k = 0; k < n1; k++) {
6451     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6452     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6453     else {
6454       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6455       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6456     }
6457   }
6458 
6459   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6460   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6461 
6462   /* Advance k to the first entry we need to take care of */
6463   for (k = 0; k < n1; k++)
6464     if (i1[k] > PETSC_MIN_INT) break;
6465   PetscInt i1start = k;
6466 
6467   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6468   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6469 
6470   /*           Send remote rows to their owner                                  */
6471   /* Find which rows should be sent to which remote ranks*/
6472   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6473   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6474   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6475   const PetscInt *ranges;
6476   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6477 
6478   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6479   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6480   for (k = rem; k < n1;) {
6481     PetscMPIInt owner;
6482     PetscInt    firstRow, lastRow;
6483 
6484     /* Locate a row range */
6485     firstRow = i1[k]; /* first row of this owner */
6486     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6487     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6488 
6489     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6490     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6491 
6492     /* All entries in [k,p) belong to this remote owner */
6493     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6494       PetscMPIInt *sendto2;
6495       PetscInt    *nentries2;
6496       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6497 
6498       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6499       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6500       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6501       PetscCall(PetscFree2(sendto, nentries2));
6502       sendto   = sendto2;
6503       nentries = nentries2;
6504       maxNsend = maxNsend2;
6505     }
6506     sendto[nsend]   = owner;
6507     nentries[nsend] = p - k;
6508     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6509     nsend++;
6510     k = p;
6511   }
6512 
6513   /* Build 1st SF to know offsets on remote to send data */
6514   PetscSF      sf1;
6515   PetscInt     nroots = 1, nroots2 = 0;
6516   PetscInt     nleaves = nsend, nleaves2 = 0;
6517   PetscInt    *offsets;
6518   PetscSFNode *iremote;
6519 
6520   PetscCall(PetscSFCreate(comm, &sf1));
6521   PetscCall(PetscMalloc1(nsend, &iremote));
6522   PetscCall(PetscMalloc1(nsend, &offsets));
6523   for (k = 0; k < nsend; k++) {
6524     iremote[k].rank  = sendto[k];
6525     iremote[k].index = 0;
6526     nleaves2 += nentries[k];
6527     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6528   }
6529   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6530   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6531   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6532   PetscCall(PetscSFDestroy(&sf1));
6533   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6534 
6535   /* Build 2nd SF to send remote COOs to their owner */
6536   PetscSF sf2;
6537   nroots  = nroots2;
6538   nleaves = nleaves2;
6539   PetscCall(PetscSFCreate(comm, &sf2));
6540   PetscCall(PetscSFSetFromOptions(sf2));
6541   PetscCall(PetscMalloc1(nleaves, &iremote));
6542   p = 0;
6543   for (k = 0; k < nsend; k++) {
6544     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6545     for (q = 0; q < nentries[k]; q++, p++) {
6546       iremote[p].rank  = sendto[k];
6547       iremote[p].index = offsets[k] + q;
6548     }
6549   }
6550   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6551 
6552   /* Send the remote COOs to their owner */
6553   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6554   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6555   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6556   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6557   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6558   PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */
6559   PetscInt *j1prem = j1 ? j1 + rem : NULL;
6560   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6561   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6562   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6563   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6564 
6565   PetscCall(PetscFree(offsets));
6566   PetscCall(PetscFree2(sendto, nentries));
6567 
6568   /* Sort received COOs by row along with the permutation array     */
6569   for (k = 0; k < n2; k++) perm2[k] = k;
6570   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6571 
6572   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6573   PetscCount *Cperm1;
6574   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6575   PetscCount *perm1prem = perm1 ? perm1 + rem : NULL;
6576   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6577   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6578 
6579   /* Support for HYPRE matrices, kind of a hack.
6580      Swap min column with diagonal so that diagonal values will go first */
6581   PetscBool   hypre;
6582   const char *name;
6583   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6584   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6585   if (hypre) {
6586     PetscInt *minj;
6587     PetscBT   hasdiag;
6588 
6589     PetscCall(PetscBTCreate(m, &hasdiag));
6590     PetscCall(PetscMalloc1(m, &minj));
6591     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6592     for (k = i1start; k < rem; k++) {
6593       if (j1[k] < cstart || j1[k] >= cend) continue;
6594       const PetscInt rindex = i1[k] - rstart;
6595       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6596       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6597     }
6598     for (k = 0; k < n2; k++) {
6599       if (j2[k] < cstart || j2[k] >= cend) continue;
6600       const PetscInt rindex = i2[k] - rstart;
6601       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6602       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6603     }
6604     for (k = i1start; k < rem; k++) {
6605       const PetscInt rindex = i1[k] - rstart;
6606       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6607       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6608       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6609     }
6610     for (k = 0; k < n2; k++) {
6611       const PetscInt rindex = i2[k] - rstart;
6612       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6613       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6614       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6615     }
6616     PetscCall(PetscBTDestroy(&hasdiag));
6617     PetscCall(PetscFree(minj));
6618   }
6619 
6620   /* Split local COOs and received COOs into diag/offdiag portions */
6621   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6622   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6623   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6624   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6625   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6626   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6627 
6628   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6629   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6630   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6631   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6632 
6633   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6634   PetscInt *Ai, *Bi;
6635   PetscInt *Aj, *Bj;
6636 
6637   PetscCall(PetscMalloc1(m + 1, &Ai));
6638   PetscCall(PetscMalloc1(m + 1, &Bi));
6639   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6640   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6641 
6642   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6643   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6644   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6645   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6646   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6647 
6648   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6649   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6650 
6651   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6652   /* expect nonzeros in A/B most likely have local contributing entries        */
6653   PetscInt    Annz = Ai[m];
6654   PetscInt    Bnnz = Bi[m];
6655   PetscCount *Ajmap1_new, *Bjmap1_new;
6656 
6657   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6658   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6659 
6660   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6661   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6662 
6663   PetscCall(PetscFree(Aimap1));
6664   PetscCall(PetscFree(Ajmap1));
6665   PetscCall(PetscFree(Bimap1));
6666   PetscCall(PetscFree(Bjmap1));
6667   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6668   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6669   PetscCall(PetscFree(perm1));
6670   PetscCall(PetscFree3(i2, j2, perm2));
6671 
6672   Ajmap1 = Ajmap1_new;
6673   Bjmap1 = Bjmap1_new;
6674 
6675   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6676   if (Annz < Annz1 + Annz2) {
6677     PetscInt *Aj_new;
6678     PetscCall(PetscMalloc1(Annz, &Aj_new));
6679     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6680     PetscCall(PetscFree(Aj));
6681     Aj = Aj_new;
6682   }
6683 
6684   if (Bnnz < Bnnz1 + Bnnz2) {
6685     PetscInt *Bj_new;
6686     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6687     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6688     PetscCall(PetscFree(Bj));
6689     Bj = Bj_new;
6690   }
6691 
6692   /* Create new submatrices for on-process and off-process coupling                  */
6693   PetscScalar     *Aa, *Ba;
6694   MatType          rtype;
6695   Mat_SeqAIJ      *a, *b;
6696   PetscObjectState state;
6697   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6698   PetscCall(PetscCalloc1(Bnnz, &Ba));
6699   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6700   if (cstart) {
6701     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6702   }
6703 
6704   PetscCall(MatGetRootType_Private(mat, &rtype));
6705 
6706   MatSeqXAIJGetOptions_Private(mpiaij->A);
6707   PetscCall(MatDestroy(&mpiaij->A));
6708   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6709   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6710   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6711 
6712   MatSeqXAIJGetOptions_Private(mpiaij->B);
6713   PetscCall(MatDestroy(&mpiaij->B));
6714   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6715   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6716   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6717 
6718   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6719   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6720   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6721   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6722 
6723   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6724   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6725   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6726   a->free_a = b->free_a = PETSC_TRUE;
6727   a->free_ij = b->free_ij = PETSC_TRUE;
6728 
6729   /* conversion must happen AFTER multiply setup */
6730   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6731   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6732   PetscCall(VecDestroy(&mpiaij->lvec));
6733   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6734 
6735   // Put the COO struct in a container and then attach that to the matrix
6736   PetscCall(PetscMalloc1(1, &coo));
6737   coo->n       = coo_n;
6738   coo->sf      = sf2;
6739   coo->sendlen = nleaves;
6740   coo->recvlen = nroots;
6741   coo->Annz    = Annz;
6742   coo->Bnnz    = Bnnz;
6743   coo->Annz2   = Annz2;
6744   coo->Bnnz2   = Bnnz2;
6745   coo->Atot1   = Atot1;
6746   coo->Atot2   = Atot2;
6747   coo->Btot1   = Btot1;
6748   coo->Btot2   = Btot2;
6749   coo->Ajmap1  = Ajmap1;
6750   coo->Aperm1  = Aperm1;
6751   coo->Bjmap1  = Bjmap1;
6752   coo->Bperm1  = Bperm1;
6753   coo->Aimap2  = Aimap2;
6754   coo->Ajmap2  = Ajmap2;
6755   coo->Aperm2  = Aperm2;
6756   coo->Bimap2  = Bimap2;
6757   coo->Bjmap2  = Bjmap2;
6758   coo->Bperm2  = Bperm2;
6759   coo->Cperm1  = Cperm1;
6760   // Allocate in preallocation. If not used, it has zero cost on host
6761   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6762   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6763   PetscCall(PetscContainerSetPointer(container, coo));
6764   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6765   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6766   PetscCall(PetscContainerDestroy(&container));
6767   PetscFunctionReturn(PETSC_SUCCESS);
6768 }
6769 
6770 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6771 {
6772   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6773   Mat                  A = mpiaij->A, B = mpiaij->B;
6774   PetscScalar         *Aa, *Ba;
6775   PetscScalar         *sendbuf, *recvbuf;
6776   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6777   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6778   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6779   const PetscCount    *Cperm1;
6780   PetscContainer       container;
6781   MatCOOStruct_MPIAIJ *coo;
6782 
6783   PetscFunctionBegin;
6784   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6785   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6786   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6787   sendbuf = coo->sendbuf;
6788   recvbuf = coo->recvbuf;
6789   Ajmap1  = coo->Ajmap1;
6790   Ajmap2  = coo->Ajmap2;
6791   Aimap2  = coo->Aimap2;
6792   Bjmap1  = coo->Bjmap1;
6793   Bjmap2  = coo->Bjmap2;
6794   Bimap2  = coo->Bimap2;
6795   Aperm1  = coo->Aperm1;
6796   Aperm2  = coo->Aperm2;
6797   Bperm1  = coo->Bperm1;
6798   Bperm2  = coo->Bperm2;
6799   Cperm1  = coo->Cperm1;
6800 
6801   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6802   PetscCall(MatSeqAIJGetArray(B, &Ba));
6803 
6804   /* Pack entries to be sent to remote */
6805   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6806 
6807   /* Send remote entries to their owner and overlap the communication with local computation */
6808   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6809   /* Add local entries to A and B */
6810   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6811     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6812     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6813     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6814   }
6815   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6816     PetscScalar sum = 0.0;
6817     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6818     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6819   }
6820   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6821 
6822   /* Add received remote entries to A and B */
6823   for (PetscCount i = 0; i < coo->Annz2; i++) {
6824     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6825   }
6826   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6827     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6828   }
6829   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6830   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6831   PetscFunctionReturn(PETSC_SUCCESS);
6832 }
6833 
6834 /*MC
6835    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6836 
6837    Options Database Keys:
6838 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6839 
6840    Level: beginner
6841 
6842    Notes:
6843    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6844     in this case the values associated with the rows and columns one passes in are set to zero
6845     in the matrix
6846 
6847     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6848     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6849 
6850 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6851 M*/
6852 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6853 {
6854   Mat_MPIAIJ *b;
6855   PetscMPIInt size;
6856 
6857   PetscFunctionBegin;
6858   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6859 
6860   PetscCall(PetscNew(&b));
6861   B->data       = (void *)b;
6862   B->ops[0]     = MatOps_Values;
6863   B->assembled  = PETSC_FALSE;
6864   B->insertmode = NOT_SET_VALUES;
6865   b->size       = size;
6866 
6867   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6868 
6869   /* build cache for off array entries formed */
6870   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6871 
6872   b->donotstash  = PETSC_FALSE;
6873   b->colmap      = NULL;
6874   b->garray      = NULL;
6875   b->roworiented = PETSC_TRUE;
6876 
6877   /* stuff used for matrix vector multiply */
6878   b->lvec  = NULL;
6879   b->Mvctx = NULL;
6880 
6881   /* stuff for MatGetRow() */
6882   b->rowindices   = NULL;
6883   b->rowvalues    = NULL;
6884   b->getrowactive = PETSC_FALSE;
6885 
6886   /* flexible pointer used in CUSPARSE classes */
6887   b->spptr = NULL;
6888 
6889   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6899 #if defined(PETSC_HAVE_CUDA)
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6901 #endif
6902 #if defined(PETSC_HAVE_HIP)
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6904 #endif
6905 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6907 #endif
6908 #if defined(PETSC_HAVE_MKL_SPARSE)
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6910 #endif
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6915 #if defined(PETSC_HAVE_ELEMENTAL)
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6917 #endif
6918 #if defined(PETSC_HAVE_SCALAPACK)
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6920 #endif
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6923 #if defined(PETSC_HAVE_HYPRE)
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6926 #endif
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6931   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6932   PetscFunctionReturn(PETSC_SUCCESS);
6933 }
6934 
6935 /*@
6936   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6937   and "off-diagonal" part of the matrix in CSR format.
6938 
6939   Collective
6940 
6941   Input Parameters:
6942 + comm - MPI communicator
6943 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6944 . n    - This value should be the same as the local size used in creating the
6945          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6946          calculated if `N` is given) For square matrices `n` is almost always `m`.
6947 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6948 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6949 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6950 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6951 . a    - matrix values
6952 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6953 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6954 - oa   - matrix values
6955 
6956   Output Parameter:
6957 . mat - the matrix
6958 
6959   Level: advanced
6960 
6961   Notes:
6962   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6963   must free the arrays once the matrix has been destroyed and not before.
6964 
6965   The `i` and `j` indices are 0 based
6966 
6967   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6968 
6969   This sets local rows and cannot be used to set off-processor values.
6970 
6971   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6972   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6973   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6974   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6975   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6976   communication if it is known that only local entries will be set.
6977 
6978 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6979           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6980 @*/
6981 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6982 {
6983   Mat_MPIAIJ *maij;
6984 
6985   PetscFunctionBegin;
6986   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6987   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6988   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6989   PetscCall(MatCreate(comm, mat));
6990   PetscCall(MatSetSizes(*mat, m, n, M, N));
6991   PetscCall(MatSetType(*mat, MATMPIAIJ));
6992   maij = (Mat_MPIAIJ *)(*mat)->data;
6993 
6994   (*mat)->preallocated = PETSC_TRUE;
6995 
6996   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6997   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6998 
6999   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7000   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7001 
7002   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7003   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7004   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7005   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7006   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7007   PetscFunctionReturn(PETSC_SUCCESS);
7008 }
7009 
7010 typedef struct {
7011   Mat       *mp;    /* intermediate products */
7012   PetscBool *mptmp; /* is the intermediate product temporary ? */
7013   PetscInt   cp;    /* number of intermediate products */
7014 
7015   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7016   PetscInt    *startsj_s, *startsj_r;
7017   PetscScalar *bufa;
7018   Mat          P_oth;
7019 
7020   /* may take advantage of merging product->B */
7021   Mat Bloc; /* B-local by merging diag and off-diag */
7022 
7023   /* cusparse does not have support to split between symbolic and numeric phases.
7024      When api_user is true, we don't need to update the numerical values
7025      of the temporary storage */
7026   PetscBool reusesym;
7027 
7028   /* support for COO values insertion */
7029   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7030   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7031   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7032   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7033   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7034   PetscMemType mtype;
7035 
7036   /* customization */
7037   PetscBool abmerge;
7038   PetscBool P_oth_bind;
7039 } MatMatMPIAIJBACKEND;
7040 
7041 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7042 {
7043   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7044   PetscInt             i;
7045 
7046   PetscFunctionBegin;
7047   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7048   PetscCall(PetscFree(mmdata->bufa));
7049   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7050   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7051   PetscCall(MatDestroy(&mmdata->P_oth));
7052   PetscCall(MatDestroy(&mmdata->Bloc));
7053   PetscCall(PetscSFDestroy(&mmdata->sf));
7054   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7055   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7056   PetscCall(PetscFree(mmdata->own[0]));
7057   PetscCall(PetscFree(mmdata->own));
7058   PetscCall(PetscFree(mmdata->off[0]));
7059   PetscCall(PetscFree(mmdata->off));
7060   PetscCall(PetscFree(mmdata));
7061   PetscFunctionReturn(PETSC_SUCCESS);
7062 }
7063 
7064 /* Copy selected n entries with indices in idx[] of A to v[].
7065    If idx is NULL, copy the whole data array of A to v[]
7066  */
7067 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7068 {
7069   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7070 
7071   PetscFunctionBegin;
7072   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7073   if (f) {
7074     PetscCall((*f)(A, n, idx, v));
7075   } else {
7076     const PetscScalar *vv;
7077 
7078     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7079     if (n && idx) {
7080       PetscScalar    *w  = v;
7081       const PetscInt *oi = idx;
7082       PetscInt        j;
7083 
7084       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7085     } else {
7086       PetscCall(PetscArraycpy(v, vv, n));
7087     }
7088     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7089   }
7090   PetscFunctionReturn(PETSC_SUCCESS);
7091 }
7092 
7093 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7094 {
7095   MatMatMPIAIJBACKEND *mmdata;
7096   PetscInt             i, n_d, n_o;
7097 
7098   PetscFunctionBegin;
7099   MatCheckProduct(C, 1);
7100   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7101   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7102   if (!mmdata->reusesym) { /* update temporary matrices */
7103     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7104     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7105   }
7106   mmdata->reusesym = PETSC_FALSE;
7107 
7108   for (i = 0; i < mmdata->cp; i++) {
7109     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7110     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7111   }
7112   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7113     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7114 
7115     if (mmdata->mptmp[i]) continue;
7116     if (noff) {
7117       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7118 
7119       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7120       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7121       n_o += noff;
7122       n_d += nown;
7123     } else {
7124       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7125 
7126       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7127       n_d += mm->nz;
7128     }
7129   }
7130   if (mmdata->hasoffproc) { /* offprocess insertion */
7131     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7132     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7133   }
7134   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7135   PetscFunctionReturn(PETSC_SUCCESS);
7136 }
7137 
7138 /* Support for Pt * A, A * P, or Pt * A * P */
7139 #define MAX_NUMBER_INTERMEDIATE 4
7140 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7141 {
7142   Mat_Product           *product = C->product;
7143   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7144   Mat_MPIAIJ            *a, *p;
7145   MatMatMPIAIJBACKEND   *mmdata;
7146   ISLocalToGlobalMapping P_oth_l2g = NULL;
7147   IS                     glob      = NULL;
7148   const char            *prefix;
7149   char                   pprefix[256];
7150   const PetscInt        *globidx, *P_oth_idx;
7151   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7152   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7153   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7154                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7155                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7156   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7157 
7158   MatProductType ptype;
7159   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7160   PetscMPIInt    size;
7161 
7162   PetscFunctionBegin;
7163   MatCheckProduct(C, 1);
7164   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7165   ptype = product->type;
7166   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7167     ptype                                          = MATPRODUCT_AB;
7168     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7169   }
7170   switch (ptype) {
7171   case MATPRODUCT_AB:
7172     A          = product->A;
7173     P          = product->B;
7174     m          = A->rmap->n;
7175     n          = P->cmap->n;
7176     M          = A->rmap->N;
7177     N          = P->cmap->N;
7178     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7179     break;
7180   case MATPRODUCT_AtB:
7181     P          = product->A;
7182     A          = product->B;
7183     m          = P->cmap->n;
7184     n          = A->cmap->n;
7185     M          = P->cmap->N;
7186     N          = A->cmap->N;
7187     hasoffproc = PETSC_TRUE;
7188     break;
7189   case MATPRODUCT_PtAP:
7190     A          = product->A;
7191     P          = product->B;
7192     m          = P->cmap->n;
7193     n          = P->cmap->n;
7194     M          = P->cmap->N;
7195     N          = P->cmap->N;
7196     hasoffproc = PETSC_TRUE;
7197     break;
7198   default:
7199     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7200   }
7201   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7202   if (size == 1) hasoffproc = PETSC_FALSE;
7203 
7204   /* defaults */
7205   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7206     mp[i]    = NULL;
7207     mptmp[i] = PETSC_FALSE;
7208     rmapt[i] = -1;
7209     cmapt[i] = -1;
7210     rmapa[i] = NULL;
7211     cmapa[i] = NULL;
7212   }
7213 
7214   /* customization */
7215   PetscCall(PetscNew(&mmdata));
7216   mmdata->reusesym = product->api_user;
7217   if (ptype == MATPRODUCT_AB) {
7218     if (product->api_user) {
7219       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7220       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7221       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7222       PetscOptionsEnd();
7223     } else {
7224       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7225       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7226       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7227       PetscOptionsEnd();
7228     }
7229   } else if (ptype == MATPRODUCT_PtAP) {
7230     if (product->api_user) {
7231       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7232       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7233       PetscOptionsEnd();
7234     } else {
7235       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7236       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7237       PetscOptionsEnd();
7238     }
7239   }
7240   a = (Mat_MPIAIJ *)A->data;
7241   p = (Mat_MPIAIJ *)P->data;
7242   PetscCall(MatSetSizes(C, m, n, M, N));
7243   PetscCall(PetscLayoutSetUp(C->rmap));
7244   PetscCall(PetscLayoutSetUp(C->cmap));
7245   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7246   PetscCall(MatGetOptionsPrefix(C, &prefix));
7247 
7248   cp = 0;
7249   switch (ptype) {
7250   case MATPRODUCT_AB: /* A * P */
7251     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7252 
7253     /* A_diag * P_local (merged or not) */
7254     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7255       /* P is product->B */
7256       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7257       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7258       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7259       PetscCall(MatProductSetFill(mp[cp], product->fill));
7260       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7261       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7262       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7263       mp[cp]->product->api_user = product->api_user;
7264       PetscCall(MatProductSetFromOptions(mp[cp]));
7265       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7266       PetscCall(ISGetIndices(glob, &globidx));
7267       rmapt[cp] = 1;
7268       cmapt[cp] = 2;
7269       cmapa[cp] = globidx;
7270       mptmp[cp] = PETSC_FALSE;
7271       cp++;
7272     } else { /* A_diag * P_diag and A_diag * P_off */
7273       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7274       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7275       PetscCall(MatProductSetFill(mp[cp], product->fill));
7276       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7277       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7278       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7279       mp[cp]->product->api_user = product->api_user;
7280       PetscCall(MatProductSetFromOptions(mp[cp]));
7281       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7282       rmapt[cp] = 1;
7283       cmapt[cp] = 1;
7284       mptmp[cp] = PETSC_FALSE;
7285       cp++;
7286       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7287       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7288       PetscCall(MatProductSetFill(mp[cp], product->fill));
7289       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7290       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7291       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7292       mp[cp]->product->api_user = product->api_user;
7293       PetscCall(MatProductSetFromOptions(mp[cp]));
7294       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7295       rmapt[cp] = 1;
7296       cmapt[cp] = 2;
7297       cmapa[cp] = p->garray;
7298       mptmp[cp] = PETSC_FALSE;
7299       cp++;
7300     }
7301 
7302     /* A_off * P_other */
7303     if (mmdata->P_oth) {
7304       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7305       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7306       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7307       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7308       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7309       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7310       PetscCall(MatProductSetFill(mp[cp], product->fill));
7311       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7312       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7313       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7314       mp[cp]->product->api_user = product->api_user;
7315       PetscCall(MatProductSetFromOptions(mp[cp]));
7316       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7317       rmapt[cp] = 1;
7318       cmapt[cp] = 2;
7319       cmapa[cp] = P_oth_idx;
7320       mptmp[cp] = PETSC_FALSE;
7321       cp++;
7322     }
7323     break;
7324 
7325   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7326     /* A is product->B */
7327     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7328     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7329       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7330       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7331       PetscCall(MatProductSetFill(mp[cp], product->fill));
7332       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7333       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7334       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7335       mp[cp]->product->api_user = product->api_user;
7336       PetscCall(MatProductSetFromOptions(mp[cp]));
7337       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7338       PetscCall(ISGetIndices(glob, &globidx));
7339       rmapt[cp] = 2;
7340       rmapa[cp] = globidx;
7341       cmapt[cp] = 2;
7342       cmapa[cp] = globidx;
7343       mptmp[cp] = PETSC_FALSE;
7344       cp++;
7345     } else {
7346       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7347       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7348       PetscCall(MatProductSetFill(mp[cp], product->fill));
7349       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7350       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7351       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7352       mp[cp]->product->api_user = product->api_user;
7353       PetscCall(MatProductSetFromOptions(mp[cp]));
7354       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7355       PetscCall(ISGetIndices(glob, &globidx));
7356       rmapt[cp] = 1;
7357       cmapt[cp] = 2;
7358       cmapa[cp] = globidx;
7359       mptmp[cp] = PETSC_FALSE;
7360       cp++;
7361       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7362       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7363       PetscCall(MatProductSetFill(mp[cp], product->fill));
7364       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7365       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7366       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7367       mp[cp]->product->api_user = product->api_user;
7368       PetscCall(MatProductSetFromOptions(mp[cp]));
7369       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7370       rmapt[cp] = 2;
7371       rmapa[cp] = p->garray;
7372       cmapt[cp] = 2;
7373       cmapa[cp] = globidx;
7374       mptmp[cp] = PETSC_FALSE;
7375       cp++;
7376     }
7377     break;
7378   case MATPRODUCT_PtAP:
7379     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7380     /* P is product->B */
7381     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7382     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7383     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7384     PetscCall(MatProductSetFill(mp[cp], product->fill));
7385     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7386     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7387     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7388     mp[cp]->product->api_user = product->api_user;
7389     PetscCall(MatProductSetFromOptions(mp[cp]));
7390     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7391     PetscCall(ISGetIndices(glob, &globidx));
7392     rmapt[cp] = 2;
7393     rmapa[cp] = globidx;
7394     cmapt[cp] = 2;
7395     cmapa[cp] = globidx;
7396     mptmp[cp] = PETSC_FALSE;
7397     cp++;
7398     if (mmdata->P_oth) {
7399       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7400       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7401       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7402       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7403       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7404       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7405       PetscCall(MatProductSetFill(mp[cp], product->fill));
7406       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7407       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7408       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7409       mp[cp]->product->api_user = product->api_user;
7410       PetscCall(MatProductSetFromOptions(mp[cp]));
7411       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7412       mptmp[cp] = PETSC_TRUE;
7413       cp++;
7414       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7415       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7416       PetscCall(MatProductSetFill(mp[cp], product->fill));
7417       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7418       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7419       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7420       mp[cp]->product->api_user = product->api_user;
7421       PetscCall(MatProductSetFromOptions(mp[cp]));
7422       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7423       rmapt[cp] = 2;
7424       rmapa[cp] = globidx;
7425       cmapt[cp] = 2;
7426       cmapa[cp] = P_oth_idx;
7427       mptmp[cp] = PETSC_FALSE;
7428       cp++;
7429     }
7430     break;
7431   default:
7432     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7433   }
7434   /* sanity check */
7435   if (size > 1)
7436     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7437 
7438   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7439   for (i = 0; i < cp; i++) {
7440     mmdata->mp[i]    = mp[i];
7441     mmdata->mptmp[i] = mptmp[i];
7442   }
7443   mmdata->cp             = cp;
7444   C->product->data       = mmdata;
7445   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7446   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7447 
7448   /* memory type */
7449   mmdata->mtype = PETSC_MEMTYPE_HOST;
7450   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7451   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7452   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7453   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7454   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7455   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7456 
7457   /* prepare coo coordinates for values insertion */
7458 
7459   /* count total nonzeros of those intermediate seqaij Mats
7460     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7461     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7462     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7463   */
7464   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7465     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7466     if (mptmp[cp]) continue;
7467     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7468       const PetscInt *rmap = rmapa[cp];
7469       const PetscInt  mr   = mp[cp]->rmap->n;
7470       const PetscInt  rs   = C->rmap->rstart;
7471       const PetscInt  re   = C->rmap->rend;
7472       const PetscInt *ii   = mm->i;
7473       for (i = 0; i < mr; i++) {
7474         const PetscInt gr = rmap[i];
7475         const PetscInt nz = ii[i + 1] - ii[i];
7476         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7477         else ncoo_oown += nz;                  /* this row is local */
7478       }
7479     } else ncoo_d += mm->nz;
7480   }
7481 
7482   /*
7483     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7484 
7485     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7486 
7487     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7488 
7489     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7490     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7491     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7492 
7493     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7494     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7495   */
7496   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7497   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7498 
7499   /* gather (i,j) of nonzeros inserted by remote procs */
7500   if (hasoffproc) {
7501     PetscSF  msf;
7502     PetscInt ncoo2, *coo_i2, *coo_j2;
7503 
7504     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7505     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7506     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7507 
7508     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7509       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7510       PetscInt   *idxoff = mmdata->off[cp];
7511       PetscInt   *idxown = mmdata->own[cp];
7512       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7513         const PetscInt *rmap = rmapa[cp];
7514         const PetscInt *cmap = cmapa[cp];
7515         const PetscInt *ii   = mm->i;
7516         PetscInt       *coi  = coo_i + ncoo_o;
7517         PetscInt       *coj  = coo_j + ncoo_o;
7518         const PetscInt  mr   = mp[cp]->rmap->n;
7519         const PetscInt  rs   = C->rmap->rstart;
7520         const PetscInt  re   = C->rmap->rend;
7521         const PetscInt  cs   = C->cmap->rstart;
7522         for (i = 0; i < mr; i++) {
7523           const PetscInt *jj = mm->j + ii[i];
7524           const PetscInt  gr = rmap[i];
7525           const PetscInt  nz = ii[i + 1] - ii[i];
7526           if (gr < rs || gr >= re) { /* this is an offproc row */
7527             for (j = ii[i]; j < ii[i + 1]; j++) {
7528               *coi++    = gr;
7529               *idxoff++ = j;
7530             }
7531             if (!cmapt[cp]) { /* already global */
7532               for (j = 0; j < nz; j++) *coj++ = jj[j];
7533             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7534               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7535             } else { /* offdiag */
7536               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7537             }
7538             ncoo_o += nz;
7539           } else { /* this is a local row */
7540             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7541           }
7542         }
7543       }
7544       mmdata->off[cp + 1] = idxoff;
7545       mmdata->own[cp + 1] = idxown;
7546     }
7547 
7548     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7549     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7550     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7551     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7552     ncoo = ncoo_d + ncoo_oown + ncoo2;
7553     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7554     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7555     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7556     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7557     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7558     PetscCall(PetscFree2(coo_i, coo_j));
7559     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7560     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7561     coo_i = coo_i2;
7562     coo_j = coo_j2;
7563   } else { /* no offproc values insertion */
7564     ncoo = ncoo_d;
7565     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7566 
7567     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7568     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7569     PetscCall(PetscSFSetUp(mmdata->sf));
7570   }
7571   mmdata->hasoffproc = hasoffproc;
7572 
7573   /* gather (i,j) of nonzeros inserted locally */
7574   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7575     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7576     PetscInt       *coi  = coo_i + ncoo_d;
7577     PetscInt       *coj  = coo_j + ncoo_d;
7578     const PetscInt *jj   = mm->j;
7579     const PetscInt *ii   = mm->i;
7580     const PetscInt *cmap = cmapa[cp];
7581     const PetscInt *rmap = rmapa[cp];
7582     const PetscInt  mr   = mp[cp]->rmap->n;
7583     const PetscInt  rs   = C->rmap->rstart;
7584     const PetscInt  re   = C->rmap->rend;
7585     const PetscInt  cs   = C->cmap->rstart;
7586 
7587     if (mptmp[cp]) continue;
7588     if (rmapt[cp] == 1) { /* consecutive rows */
7589       /* fill coo_i */
7590       for (i = 0; i < mr; i++) {
7591         const PetscInt gr = i + rs;
7592         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7593       }
7594       /* fill coo_j */
7595       if (!cmapt[cp]) { /* type-0, already global */
7596         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7597       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7598         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7599       } else {                                            /* type-2, local to global for sparse columns */
7600         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7601       }
7602       ncoo_d += mm->nz;
7603     } else if (rmapt[cp] == 2) { /* sparse rows */
7604       for (i = 0; i < mr; i++) {
7605         const PetscInt *jj = mm->j + ii[i];
7606         const PetscInt  gr = rmap[i];
7607         const PetscInt  nz = ii[i + 1] - ii[i];
7608         if (gr >= rs && gr < re) { /* local rows */
7609           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7610           if (!cmapt[cp]) { /* type-0, already global */
7611             for (j = 0; j < nz; j++) *coj++ = jj[j];
7612           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7613             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7614           } else { /* type-2, local to global for sparse columns */
7615             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7616           }
7617           ncoo_d += nz;
7618         }
7619       }
7620     }
7621   }
7622   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7623   PetscCall(ISDestroy(&glob));
7624   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7625   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7626   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7627   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7628 
7629   /* preallocate with COO data */
7630   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7631   PetscCall(PetscFree2(coo_i, coo_j));
7632   PetscFunctionReturn(PETSC_SUCCESS);
7633 }
7634 
7635 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7636 {
7637   Mat_Product *product = mat->product;
7638 #if defined(PETSC_HAVE_DEVICE)
7639   PetscBool match  = PETSC_FALSE;
7640   PetscBool usecpu = PETSC_FALSE;
7641 #else
7642   PetscBool match = PETSC_TRUE;
7643 #endif
7644 
7645   PetscFunctionBegin;
7646   MatCheckProduct(mat, 1);
7647 #if defined(PETSC_HAVE_DEVICE)
7648   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7649   if (match) { /* we can always fallback to the CPU if requested */
7650     switch (product->type) {
7651     case MATPRODUCT_AB:
7652       if (product->api_user) {
7653         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7654         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7655         PetscOptionsEnd();
7656       } else {
7657         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7658         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7659         PetscOptionsEnd();
7660       }
7661       break;
7662     case MATPRODUCT_AtB:
7663       if (product->api_user) {
7664         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7665         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7666         PetscOptionsEnd();
7667       } else {
7668         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7669         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7670         PetscOptionsEnd();
7671       }
7672       break;
7673     case MATPRODUCT_PtAP:
7674       if (product->api_user) {
7675         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7676         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7677         PetscOptionsEnd();
7678       } else {
7679         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7680         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7681         PetscOptionsEnd();
7682       }
7683       break;
7684     default:
7685       break;
7686     }
7687     match = (PetscBool)!usecpu;
7688   }
7689 #endif
7690   if (match) {
7691     switch (product->type) {
7692     case MATPRODUCT_AB:
7693     case MATPRODUCT_AtB:
7694     case MATPRODUCT_PtAP:
7695       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7696       break;
7697     default:
7698       break;
7699     }
7700   }
7701   /* fallback to MPIAIJ ops */
7702   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7703   PetscFunctionReturn(PETSC_SUCCESS);
7704 }
7705 
7706 /*
7707    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7708 
7709    n - the number of block indices in cc[]
7710    cc - the block indices (must be large enough to contain the indices)
7711 */
7712 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7713 {
7714   PetscInt        cnt = -1, nidx, j;
7715   const PetscInt *idx;
7716 
7717   PetscFunctionBegin;
7718   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7719   if (nidx) {
7720     cnt     = 0;
7721     cc[cnt] = idx[0] / bs;
7722     for (j = 1; j < nidx; j++) {
7723       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7724     }
7725   }
7726   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7727   *n = cnt + 1;
7728   PetscFunctionReturn(PETSC_SUCCESS);
7729 }
7730 
7731 /*
7732     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7733 
7734     ncollapsed - the number of block indices
7735     collapsed - the block indices (must be large enough to contain the indices)
7736 */
7737 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7738 {
7739   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7740 
7741   PetscFunctionBegin;
7742   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7743   for (i = start + 1; i < start + bs; i++) {
7744     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7745     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7746     cprevtmp = cprev;
7747     cprev    = merged;
7748     merged   = cprevtmp;
7749   }
7750   *ncollapsed = nprev;
7751   if (collapsed) *collapsed = cprev;
7752   PetscFunctionReturn(PETSC_SUCCESS);
7753 }
7754 
7755 /*
7756  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7757 
7758  Input Parameter:
7759  . Amat - matrix
7760  - symmetrize - make the result symmetric
7761  + scale - scale with diagonal
7762 
7763  Output Parameter:
7764  . a_Gmat - output scalar graph >= 0
7765 
7766 */
7767 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7768 {
7769   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7770   MPI_Comm  comm;
7771   Mat       Gmat;
7772   PetscBool ismpiaij, isseqaij;
7773   Mat       a, b, c;
7774   MatType   jtype;
7775 
7776   PetscFunctionBegin;
7777   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7778   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7779   PetscCall(MatGetSize(Amat, &MM, &NN));
7780   PetscCall(MatGetBlockSize(Amat, &bs));
7781   nloc = (Iend - Istart) / bs;
7782 
7783   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7784   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7785   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7786 
7787   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7788   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7789      implementation */
7790   if (bs > 1) {
7791     PetscCall(MatGetType(Amat, &jtype));
7792     PetscCall(MatCreate(comm, &Gmat));
7793     PetscCall(MatSetType(Gmat, jtype));
7794     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7795     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7796     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7797       PetscInt  *d_nnz, *o_nnz;
7798       MatScalar *aa, val, *AA;
7799       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7800       if (isseqaij) {
7801         a = Amat;
7802         b = NULL;
7803       } else {
7804         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7805         a             = d->A;
7806         b             = d->B;
7807       }
7808       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7809       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7810       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7811         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7812         const PetscInt *cols1, *cols2;
7813         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7814           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7815           nnz[brow / bs] = nc2 / bs;
7816           if (nc2 % bs) ok = 0;
7817           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7818           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7819             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7820             if (nc1 != nc2) ok = 0;
7821             else {
7822               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7823                 if (cols1[jj] != cols2[jj]) ok = 0;
7824                 if (cols1[jj] % bs != jj % bs) ok = 0;
7825               }
7826             }
7827             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7828           }
7829           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7830           if (!ok) {
7831             PetscCall(PetscFree2(d_nnz, o_nnz));
7832             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7833             goto old_bs;
7834           }
7835         }
7836       }
7837       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7838       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7839       PetscCall(PetscFree2(d_nnz, o_nnz));
7840       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7841       // diag
7842       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7843         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7844         ai               = aseq->i;
7845         n                = ai[brow + 1] - ai[brow];
7846         aj               = aseq->j + ai[brow];
7847         for (int k = 0; k < n; k += bs) {        // block columns
7848           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7849           val        = 0;
7850           if (index_size == 0) {
7851             for (int ii = 0; ii < bs; ii++) { // rows in block
7852               aa = aseq->a + ai[brow + ii] + k;
7853               for (int jj = 0; jj < bs; jj++) {         // columns in block
7854                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7855               }
7856             }
7857           } else {                                       // use (index,index) value if provided
7858             for (int iii = 0; iii < index_size; iii++) { // rows in block
7859               int ii = index[iii];
7860               aa     = aseq->a + ai[brow + ii] + k;
7861               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7862                 int jj = index[jjj];
7863                 val += PetscAbs(PetscRealPart(aa[jj]));
7864               }
7865             }
7866           }
7867           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7868           AA[k / bs] = val;
7869         }
7870         grow = Istart / bs + brow / bs;
7871         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7872       }
7873       // off-diag
7874       if (ismpiaij) {
7875         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7876         const PetscScalar *vals;
7877         const PetscInt    *cols, *garray = aij->garray;
7878         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7879         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7880           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7881           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7882             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7883             AA[k / bs] = 0;
7884             AJ[cidx]   = garray[cols[k]] / bs;
7885           }
7886           nc = ncols / bs;
7887           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7888           if (index_size == 0) {
7889             for (int ii = 0; ii < bs; ii++) { // rows in block
7890               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7891               for (int k = 0; k < ncols; k += bs) {
7892                 for (int jj = 0; jj < bs; jj++) { // cols in block
7893                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7894                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7895                 }
7896               }
7897               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7898             }
7899           } else {                                       // use (index,index) value if provided
7900             for (int iii = 0; iii < index_size; iii++) { // rows in block
7901               int ii = index[iii];
7902               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7903               for (int k = 0; k < ncols; k += bs) {
7904                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7905                   int jj = index[jjj];
7906                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7907                 }
7908               }
7909               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7910             }
7911           }
7912           grow = Istart / bs + brow / bs;
7913           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7914         }
7915       }
7916       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7917       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7918       PetscCall(PetscFree2(AA, AJ));
7919     } else {
7920       const PetscScalar *vals;
7921       const PetscInt    *idx;
7922       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7923     old_bs:
7924       /*
7925        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7926        */
7927       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7928       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7929       if (isseqaij) {
7930         PetscInt max_d_nnz;
7931         /*
7932          Determine exact preallocation count for (sequential) scalar matrix
7933          */
7934         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7935         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7936         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7937         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7938         PetscCall(PetscFree3(w0, w1, w2));
7939       } else if (ismpiaij) {
7940         Mat             Daij, Oaij;
7941         const PetscInt *garray;
7942         PetscInt        max_d_nnz;
7943         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7944         /*
7945          Determine exact preallocation count for diagonal block portion of scalar matrix
7946          */
7947         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7948         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7949         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7950         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7951         PetscCall(PetscFree3(w0, w1, w2));
7952         /*
7953          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7954          */
7955         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7956           o_nnz[jj] = 0;
7957           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7958             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7959             o_nnz[jj] += ncols;
7960             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7961           }
7962           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7963         }
7964       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7965       /* get scalar copy (norms) of matrix */
7966       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7967       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7968       PetscCall(PetscFree2(d_nnz, o_nnz));
7969       for (Ii = Istart; Ii < Iend; Ii++) {
7970         PetscInt dest_row = Ii / bs;
7971         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7972         for (jj = 0; jj < ncols; jj++) {
7973           PetscInt    dest_col = idx[jj] / bs;
7974           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7975           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7976         }
7977         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7978       }
7979       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7980       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7981     }
7982   } else {
7983     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7984     else {
7985       Gmat = Amat;
7986       PetscCall(PetscObjectReference((PetscObject)Gmat));
7987     }
7988     if (isseqaij) {
7989       a = Gmat;
7990       b = NULL;
7991     } else {
7992       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7993       a             = d->A;
7994       b             = d->B;
7995     }
7996     if (filter >= 0 || scale) {
7997       /* take absolute value of each entry */
7998       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7999         MatInfo      info;
8000         PetscScalar *avals;
8001         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8002         PetscCall(MatSeqAIJGetArray(c, &avals));
8003         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8004         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8005       }
8006     }
8007   }
8008   if (symmetrize) {
8009     PetscBool isset, issym;
8010     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8011     if (!isset || !issym) {
8012       Mat matTrans;
8013       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8014       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8015       PetscCall(MatDestroy(&matTrans));
8016     }
8017     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8018   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8019   if (scale) {
8020     /* scale c for all diagonal values = 1 or -1 */
8021     Vec diag;
8022     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8023     PetscCall(MatGetDiagonal(Gmat, diag));
8024     PetscCall(VecReciprocal(diag));
8025     PetscCall(VecSqrtAbs(diag));
8026     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8027     PetscCall(VecDestroy(&diag));
8028   }
8029   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8030 
8031   if (filter >= 0) {
8032     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8033     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8034   }
8035   *a_Gmat = Gmat;
8036   PetscFunctionReturn(PETSC_SUCCESS);
8037 }
8038 
8039 /*
8040     Special version for direct calls from Fortran
8041 */
8042 
8043 /* Change these macros so can be used in void function */
8044 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8045 #undef PetscCall
8046 #define PetscCall(...) \
8047   do { \
8048     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8049     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8050       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8051       return; \
8052     } \
8053   } while (0)
8054 
8055 #undef SETERRQ
8056 #define SETERRQ(comm, ierr, ...) \
8057   do { \
8058     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8059     return; \
8060   } while (0)
8061 
8062 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8063   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8064 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8065   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8066 #else
8067 #endif
8068 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8069 {
8070   Mat         mat = *mmat;
8071   PetscInt    m = *mm, n = *mn;
8072   InsertMode  addv = *maddv;
8073   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8074   PetscScalar value;
8075 
8076   MatCheckPreallocated(mat, 1);
8077   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8078   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8079   {
8080     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8081     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8082     PetscBool roworiented = aij->roworiented;
8083 
8084     /* Some Variables required in the macro */
8085     Mat         A     = aij->A;
8086     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8087     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8088     MatScalar  *aa;
8089     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8090     Mat         B                 = aij->B;
8091     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8092     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8093     MatScalar  *ba;
8094     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8095      * cannot use "#if defined" inside a macro. */
8096     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8097 
8098     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8099     PetscInt   nonew = a->nonew;
8100     MatScalar *ap1, *ap2;
8101 
8102     PetscFunctionBegin;
8103     PetscCall(MatSeqAIJGetArray(A, &aa));
8104     PetscCall(MatSeqAIJGetArray(B, &ba));
8105     for (i = 0; i < m; i++) {
8106       if (im[i] < 0) continue;
8107       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8108       if (im[i] >= rstart && im[i] < rend) {
8109         row      = im[i] - rstart;
8110         lastcol1 = -1;
8111         rp1      = aj + ai[row];
8112         ap1      = aa + ai[row];
8113         rmax1    = aimax[row];
8114         nrow1    = ailen[row];
8115         low1     = 0;
8116         high1    = nrow1;
8117         lastcol2 = -1;
8118         rp2      = bj + bi[row];
8119         ap2      = ba + bi[row];
8120         rmax2    = bimax[row];
8121         nrow2    = bilen[row];
8122         low2     = 0;
8123         high2    = nrow2;
8124 
8125         for (j = 0; j < n; j++) {
8126           if (roworiented) value = v[i * n + j];
8127           else value = v[i + j * m];
8128           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8129           if (in[j] >= cstart && in[j] < cend) {
8130             col = in[j] - cstart;
8131             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8132           } else if (in[j] < 0) continue;
8133           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8134             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8135           } else {
8136             if (mat->was_assembled) {
8137               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8138 #if defined(PETSC_USE_CTABLE)
8139               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8140               col--;
8141 #else
8142               col = aij->colmap[in[j]] - 1;
8143 #endif
8144               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8145                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8146                 col = in[j];
8147                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8148                 B        = aij->B;
8149                 b        = (Mat_SeqAIJ *)B->data;
8150                 bimax    = b->imax;
8151                 bi       = b->i;
8152                 bilen    = b->ilen;
8153                 bj       = b->j;
8154                 rp2      = bj + bi[row];
8155                 ap2      = ba + bi[row];
8156                 rmax2    = bimax[row];
8157                 nrow2    = bilen[row];
8158                 low2     = 0;
8159                 high2    = nrow2;
8160                 bm       = aij->B->rmap->n;
8161                 ba       = b->a;
8162                 inserted = PETSC_FALSE;
8163               }
8164             } else col = in[j];
8165             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8166           }
8167         }
8168       } else if (!aij->donotstash) {
8169         if (roworiented) {
8170           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8171         } else {
8172           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8173         }
8174       }
8175     }
8176     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8177     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8178   }
8179   PetscFunctionReturnVoid();
8180 }
8181 
8182 /* Undefining these here since they were redefined from their original definition above! No
8183  * other PETSc functions should be defined past this point, as it is impossible to recover the
8184  * original definitions */
8185 #undef PetscCall
8186 #undef SETERRQ
8187