xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b2e8165f902b29d33cb5c6fcd6da9ceca759ca7a)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287 
288   PetscFunctionBegin;
289   PetscCall(MatGetSize(A, &m, &n));
290   PetscCall(PetscCalloc1(n, &work));
291   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
292   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
294   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
295   if (type == NORM_2) {
296     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
297     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
298   } else if (type == NORM_1) {
299     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
300     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
301   } else if (type == NORM_INFINITY) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
304   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
307   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
310   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
311   if (type == NORM_INFINITY) {
312     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
313   } else {
314     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
315   }
316   PetscCall(PetscFree(work));
317   if (type == NORM_2) {
318     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
319   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
320     for (i = 0; i < n; i++) reductions[i] /= m;
321   }
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
326 {
327   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
328   IS              sis, gis;
329   const PetscInt *isis, *igis;
330   PetscInt        n, *iis, nsis, ngis, rstart, i;
331 
332   PetscFunctionBegin;
333   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
334   PetscCall(MatFindNonzeroRows(a->B, &gis));
335   PetscCall(ISGetSize(gis, &ngis));
336   PetscCall(ISGetSize(sis, &nsis));
337   PetscCall(ISGetIndices(sis, &isis));
338   PetscCall(ISGetIndices(gis, &igis));
339 
340   PetscCall(PetscMalloc1(ngis + nsis, &iis));
341   PetscCall(PetscArraycpy(iis, igis, ngis));
342   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
343   n = ngis + nsis;
344   PetscCall(PetscSortRemoveDupsInt(&n, iis));
345   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
346   for (i = 0; i < n; i++) iis[i] += rstart;
347   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
348 
349   PetscCall(ISRestoreIndices(sis, &isis));
350   PetscCall(ISRestoreIndices(gis, &igis));
351   PetscCall(ISDestroy(&sis));
352   PetscCall(ISDestroy(&gis));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
356 /*
357   Local utility routine that creates a mapping from the global column
358 number to the local number in the off-diagonal part of the local
359 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
360 a slightly higher hash table cost; without it it is not scalable (each processor
361 has an order N integer array but is fast to access.
362 */
363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
364 {
365   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
366   PetscInt    n   = aij->B->cmap->n, i;
367 
368   PetscFunctionBegin;
369   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
370 #if defined(PETSC_USE_CTABLE)
371   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
372   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
373 #else
374   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
375   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
376 #endif
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
381   do { \
382     if (col <= lastcol1) low1 = 0; \
383     else high1 = nrow1; \
384     lastcol1 = col; \
385     while (high1 - low1 > 5) { \
386       t = (low1 + high1) / 2; \
387       if (rp1[t] > col) high1 = t; \
388       else low1 = t; \
389     } \
390     for (_i = low1; _i < high1; _i++) { \
391       if (rp1[_i] > col) break; \
392       if (rp1[_i] == col) { \
393         if (addv == ADD_VALUES) { \
394           ap1[_i] += value; \
395           /* Not sure LogFlops will slow dow the code or not */ \
396           (void)PetscLogFlops(1.0); \
397         } else ap1[_i] = value; \
398         goto a_noinsert; \
399       } \
400     } \
401     if (value == 0.0 && ignorezeroentries && row != col) { \
402       low1  = 0; \
403       high1 = nrow1; \
404       goto a_noinsert; \
405     } \
406     if (nonew == 1) { \
407       low1  = 0; \
408       high1 = nrow1; \
409       goto a_noinsert; \
410     } \
411     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
412     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
413     N = nrow1++ - 1; \
414     a->nz++; \
415     high1++; \
416     /* shift up all the later entries in this row */ \
417     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
418     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
419     rp1[_i] = col; \
420     ap1[_i] = value; \
421   a_noinsert:; \
422     ailen[row] = nrow1; \
423   } while (0)
424 
425 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
426   do { \
427     if (col <= lastcol2) low2 = 0; \
428     else high2 = nrow2; \
429     lastcol2 = col; \
430     while (high2 - low2 > 5) { \
431       t = (low2 + high2) / 2; \
432       if (rp2[t] > col) high2 = t; \
433       else low2 = t; \
434     } \
435     for (_i = low2; _i < high2; _i++) { \
436       if (rp2[_i] > col) break; \
437       if (rp2[_i] == col) { \
438         if (addv == ADD_VALUES) { \
439           ap2[_i] += value; \
440           (void)PetscLogFlops(1.0); \
441         } else ap2[_i] = value; \
442         goto b_noinsert; \
443       } \
444     } \
445     if (value == 0.0 && ignorezeroentries) { \
446       low2  = 0; \
447       high2 = nrow2; \
448       goto b_noinsert; \
449     } \
450     if (nonew == 1) { \
451       low2  = 0; \
452       high2 = nrow2; \
453       goto b_noinsert; \
454     } \
455     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
456     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
457     N = nrow2++ - 1; \
458     b->nz++; \
459     high2++; \
460     /* shift up all the later entries in this row */ \
461     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
462     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
463     rp2[_i] = col; \
464     ap2[_i] = value; \
465   b_noinsert:; \
466     bilen[row] = nrow2; \
467   } while (0)
468 
469 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
470 {
471   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
472   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
473   PetscInt     l, *garray                         = mat->garray, diag;
474   PetscScalar *aa, *ba;
475 
476   PetscFunctionBegin;
477   /* code only works for square matrices A */
478 
479   /* find size of row to the left of the diagonal part */
480   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
481   row = row - diag;
482   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
483     if (garray[b->j[b->i[row] + l]] > diag) break;
484   }
485   if (l) {
486     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
487     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
488     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
489   }
490 
491   /* diagonal part */
492   if (a->i[row + 1] - a->i[row]) {
493     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
494     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
495     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
496   }
497 
498   /* right of diagonal part */
499   if (b->i[row + 1] - b->i[row] - l) {
500     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
501     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
502     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
503   }
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506 
507 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
508 {
509   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
510   PetscScalar value = 0.0;
511   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
512   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
513   PetscBool   roworiented = aij->roworiented;
514 
515   /* Some Variables required in the macro */
516   Mat         A     = aij->A;
517   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
518   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
519   PetscBool   ignorezeroentries = a->ignorezeroentries;
520   Mat         B                 = aij->B;
521   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
522   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
523   MatScalar  *aa, *ba;
524   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
525   PetscInt    nonew;
526   MatScalar  *ap1, *ap2;
527 
528   PetscFunctionBegin;
529   PetscCall(MatSeqAIJGetArray(A, &aa));
530   PetscCall(MatSeqAIJGetArray(B, &ba));
531   for (i = 0; i < m; i++) {
532     if (im[i] < 0) continue;
533     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
534     if (im[i] >= rstart && im[i] < rend) {
535       row      = im[i] - rstart;
536       lastcol1 = -1;
537       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
538       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
539       rmax1    = aimax[row];
540       nrow1    = ailen[row];
541       low1     = 0;
542       high1    = nrow1;
543       lastcol2 = -1;
544       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
545       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
546       rmax2    = bimax[row];
547       nrow2    = bilen[row];
548       low2     = 0;
549       high2    = nrow2;
550 
551       for (j = 0; j < n; j++) {
552         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
553         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
554         if (in[j] >= cstart && in[j] < cend) {
555           col   = in[j] - cstart;
556           nonew = a->nonew;
557           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
558         } else if (in[j] < 0) {
559           continue;
560         } else {
561           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
562           if (mat->was_assembled) {
563             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
564 #if defined(PETSC_USE_CTABLE)
565             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
566             col--;
567 #else
568             col = aij->colmap[in[j]] - 1;
569 #endif
570             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
571               PetscCall(MatDisAssemble_MPIAIJ(mat));               /* Change aij->B from reduced/local format to expanded/global format */
572               col = in[j];
573               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
574               B     = aij->B;
575               b     = (Mat_SeqAIJ *)B->data;
576               bimax = b->imax;
577               bi    = b->i;
578               bilen = b->ilen;
579               bj    = b->j;
580               ba    = b->a;
581               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
582               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
583               rmax2 = bimax[row];
584               nrow2 = bilen[row];
585               low2  = 0;
586               high2 = nrow2;
587               bm    = aij->B->rmap->n;
588               ba    = b->a;
589             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
590               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
591                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
592               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
593             }
594           } else col = in[j];
595           nonew = b->nonew;
596           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
597         }
598       }
599     } else {
600       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
601       if (!aij->donotstash) {
602         mat->assembled = PETSC_FALSE;
603         if (roworiented) {
604           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
605         } else {
606           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         }
608       }
609     }
610   }
611   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
612   PetscCall(MatSeqAIJRestoreArray(B, &ba));
613   PetscFunctionReturn(PETSC_SUCCESS);
614 }
615 
616 /*
617     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
618     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
619     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
620 */
621 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
622 {
623   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
624   Mat         A      = aij->A; /* diagonal part of the matrix */
625   Mat         B      = aij->B; /* off-diagonal part of the matrix */
626   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
627   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
628   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
629   PetscInt   *ailen = a->ilen, *aj = a->j;
630   PetscInt   *bilen = b->ilen, *bj = b->j;
631   PetscInt    am          = aij->A->rmap->n, j;
632   PetscInt    diag_so_far = 0, dnz;
633   PetscInt    offd_so_far = 0, onz;
634 
635   PetscFunctionBegin;
636   /* Iterate over all rows of the matrix */
637   for (j = 0; j < am; j++) {
638     dnz = onz = 0;
639     /*  Iterate over all non-zero columns of the current row */
640     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
641       /* If column is in the diagonal */
642       if (mat_j[col] >= cstart && mat_j[col] < cend) {
643         aj[diag_so_far++] = mat_j[col] - cstart;
644         dnz++;
645       } else { /* off-diagonal entries */
646         bj[offd_so_far++] = mat_j[col];
647         onz++;
648       }
649     }
650     ailen[j] = dnz;
651     bilen[j] = onz;
652   }
653   PetscFunctionReturn(PETSC_SUCCESS);
654 }
655 
656 /*
657     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
658     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
659     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
660     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
661     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
662 */
663 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
664 {
665   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
666   Mat          A    = aij->A; /* diagonal part of the matrix */
667   Mat          B    = aij->B; /* off-diagonal part of the matrix */
668   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
669   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
670   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
671   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
672   PetscInt    *ailen = a->ilen, *aj = a->j;
673   PetscInt    *bilen = b->ilen, *bj = b->j;
674   PetscInt     am          = aij->A->rmap->n, j;
675   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
676   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
677   PetscScalar *aa = a->a, *ba = b->a;
678 
679   PetscFunctionBegin;
680   /* Iterate over all rows of the matrix */
681   for (j = 0; j < am; j++) {
682     dnz_row = onz_row = 0;
683     rowstart_offd     = full_offd_i[j];
684     rowstart_diag     = full_diag_i[j];
685     /*  Iterate over all non-zero columns of the current row */
686     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
687       /* If column is in the diagonal */
688       if (mat_j[col] >= cstart && mat_j[col] < cend) {
689         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
690         aa[rowstart_diag + dnz_row] = mat_a[col];
691         dnz_row++;
692       } else { /* off-diagonal entries */
693         bj[rowstart_offd + onz_row] = mat_j[col];
694         ba[rowstart_offd + onz_row] = mat_a[col];
695         onz_row++;
696       }
697     }
698     ailen[j] = dnz_row;
699     bilen[j] = onz_row;
700   }
701   PetscFunctionReturn(PETSC_SUCCESS);
702 }
703 
704 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
705 {
706   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
707   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
708   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
709 
710   PetscFunctionBegin;
711   for (i = 0; i < m; i++) {
712     if (idxm[i] < 0) continue; /* negative row */
713     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
714     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
715     row = idxm[i] - rstart;
716     for (j = 0; j < n; j++) {
717       if (idxn[j] < 0) continue; /* negative column */
718       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
719       if (idxn[j] >= cstart && idxn[j] < cend) {
720         col = idxn[j] - cstart;
721         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
722       } else {
723         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
724 #if defined(PETSC_USE_CTABLE)
725         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
726         col--;
727 #else
728         col = aij->colmap[idxn[j]] - 1;
729 #endif
730         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
731         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
732       }
733     }
734   }
735   PetscFunctionReturn(PETSC_SUCCESS);
736 }
737 
738 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
739 {
740   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
741   PetscInt    nstash, reallocs;
742 
743   PetscFunctionBegin;
744   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
745 
746   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
747   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
748   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
753 {
754   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
755   PetscMPIInt  n;
756   PetscInt     i, j, rstart, ncols, flg;
757   PetscInt    *row, *col;
758   PetscBool    other_disassembled;
759   PetscScalar *val;
760 
761   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
762 
763   PetscFunctionBegin;
764   if (!aij->donotstash && !mat->nooffprocentries) {
765     while (1) {
766       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
767       if (!flg) break;
768 
769       for (i = 0; i < n;) {
770         /* Now identify the consecutive vals belonging to the same row */
771         for (j = i, rstart = row[j]; j < n; j++) {
772           if (row[j] != rstart) break;
773         }
774         if (j < n) ncols = j - i;
775         else ncols = n - i;
776         /* Now assemble all these values with a single function call */
777         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
778         i = j;
779       }
780     }
781     PetscCall(MatStashScatterEnd_Private(&mat->stash));
782   }
783 #if defined(PETSC_HAVE_DEVICE)
784   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
785   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
786   if (mat->boundtocpu) {
787     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
788     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
789   }
790 #endif
791   PetscCall(MatAssemblyBegin(aij->A, mode));
792   PetscCall(MatAssemblyEnd(aij->A, mode));
793 
794   /* determine if any processor has disassembled, if so we must
795      also disassemble ourself, in order that we may reassemble. */
796   /*
797      if nonzero structure of submatrix B cannot change then we know that
798      no processor disassembled thus we can skip this stuff
799   */
800   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
801     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
802     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
803       PetscCall(MatDisAssemble_MPIAIJ(mat));
804     }
805   }
806   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
807   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
808 #if defined(PETSC_HAVE_DEVICE)
809   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
810 #endif
811   PetscCall(MatAssemblyBegin(aij->B, mode));
812   PetscCall(MatAssemblyEnd(aij->B, mode));
813 
814   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
815 
816   aij->rowvalues = NULL;
817 
818   PetscCall(VecDestroy(&aij->diag));
819 
820   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
821   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
822     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
823     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
824   }
825 #if defined(PETSC_HAVE_DEVICE)
826   mat->offloadmask = PETSC_OFFLOAD_BOTH;
827 #endif
828   PetscFunctionReturn(PETSC_SUCCESS);
829 }
830 
831 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
832 {
833   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
834 
835   PetscFunctionBegin;
836   PetscCall(MatZeroEntries(l->A));
837   PetscCall(MatZeroEntries(l->B));
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
842 {
843   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
844   PetscInt   *lrows;
845   PetscInt    r, len;
846   PetscBool   cong;
847 
848   PetscFunctionBegin;
849   /* get locally owned rows */
850   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
851   PetscCall(MatHasCongruentLayouts(A, &cong));
852   /* fix right-hand side if needed */
853   if (x && b) {
854     const PetscScalar *xx;
855     PetscScalar       *bb;
856 
857     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
858     PetscCall(VecGetArrayRead(x, &xx));
859     PetscCall(VecGetArray(b, &bb));
860     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
861     PetscCall(VecRestoreArrayRead(x, &xx));
862     PetscCall(VecRestoreArray(b, &bb));
863   }
864 
865   if (diag != 0.0 && cong) {
866     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
867     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
868   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
869     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
870     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
871     PetscInt    nnwA, nnwB;
872     PetscBool   nnzA, nnzB;
873 
874     nnwA = aijA->nonew;
875     nnwB = aijB->nonew;
876     nnzA = aijA->keepnonzeropattern;
877     nnzB = aijB->keepnonzeropattern;
878     if (!nnzA) {
879       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
880       aijA->nonew = 0;
881     }
882     if (!nnzB) {
883       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
884       aijB->nonew = 0;
885     }
886     /* Must zero here before the next loop */
887     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
888     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
889     for (r = 0; r < len; ++r) {
890       const PetscInt row = lrows[r] + A->rmap->rstart;
891       if (row >= A->cmap->N) continue;
892       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
893     }
894     aijA->nonew = nnwA;
895     aijB->nonew = nnwB;
896   } else {
897     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
898     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
899   }
900   PetscCall(PetscFree(lrows));
901   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
902   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
903 
904   /* only change matrix nonzero state if pattern was allowed to be changed */
905   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
906     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
907     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
908   }
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
912 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
913 {
914   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
915   PetscMPIInt        n = A->rmap->n;
916   PetscInt           i, j, r, m, len = 0;
917   PetscInt          *lrows, *owners = A->rmap->range;
918   PetscMPIInt        p = 0;
919   PetscSFNode       *rrows;
920   PetscSF            sf;
921   const PetscScalar *xx;
922   PetscScalar       *bb, *mask, *aij_a;
923   Vec                xmask, lmask;
924   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
925   const PetscInt    *aj, *ii, *ridx;
926   PetscScalar       *aa;
927 
928   PetscFunctionBegin;
929   /* Create SF where leaves are input rows and roots are owned rows */
930   PetscCall(PetscMalloc1(n, &lrows));
931   for (r = 0; r < n; ++r) lrows[r] = -1;
932   PetscCall(PetscMalloc1(N, &rrows));
933   for (r = 0; r < N; ++r) {
934     const PetscInt idx = rows[r];
935     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
936     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
937       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
938     }
939     rrows[r].rank  = p;
940     rrows[r].index = rows[r] - owners[p];
941   }
942   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
943   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
944   /* Collect flags for rows to be zeroed */
945   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
946   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
947   PetscCall(PetscSFDestroy(&sf));
948   /* Compress and put in row numbers */
949   for (r = 0; r < n; ++r)
950     if (lrows[r] >= 0) lrows[len++] = r;
951   /* zero diagonal part of matrix */
952   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
953   /* handle off-diagonal part of matrix */
954   PetscCall(MatCreateVecs(A, &xmask, NULL));
955   PetscCall(VecDuplicate(l->lvec, &lmask));
956   PetscCall(VecGetArray(xmask, &bb));
957   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
958   PetscCall(VecRestoreArray(xmask, &bb));
959   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
960   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
961   PetscCall(VecDestroy(&xmask));
962   if (x && b) { /* this code is buggy when the row and column layout don't match */
963     PetscBool cong;
964 
965     PetscCall(MatHasCongruentLayouts(A, &cong));
966     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
967     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
968     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
969     PetscCall(VecGetArrayRead(l->lvec, &xx));
970     PetscCall(VecGetArray(b, &bb));
971   }
972   PetscCall(VecGetArray(lmask, &mask));
973   /* remove zeroed rows of off-diagonal matrix */
974   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
975   ii = aij->i;
976   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
977   /* loop over all elements of off process part of matrix zeroing removed columns*/
978   if (aij->compressedrow.use) {
979     m    = aij->compressedrow.nrows;
980     ii   = aij->compressedrow.i;
981     ridx = aij->compressedrow.rindex;
982     for (i = 0; i < m; i++) {
983       n  = ii[i + 1] - ii[i];
984       aj = aij->j + ii[i];
985       aa = aij_a + ii[i];
986 
987       for (j = 0; j < n; j++) {
988         if (PetscAbsScalar(mask[*aj])) {
989           if (b) bb[*ridx] -= *aa * xx[*aj];
990           *aa = 0.0;
991         }
992         aa++;
993         aj++;
994       }
995       ridx++;
996     }
997   } else { /* do not use compressed row format */
998     m = l->B->rmap->n;
999     for (i = 0; i < m; i++) {
1000       n  = ii[i + 1] - ii[i];
1001       aj = aij->j + ii[i];
1002       aa = aij_a + ii[i];
1003       for (j = 0; j < n; j++) {
1004         if (PetscAbsScalar(mask[*aj])) {
1005           if (b) bb[i] -= *aa * xx[*aj];
1006           *aa = 0.0;
1007         }
1008         aa++;
1009         aj++;
1010       }
1011     }
1012   }
1013   if (x && b) {
1014     PetscCall(VecRestoreArray(b, &bb));
1015     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1016   }
1017   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1018   PetscCall(VecRestoreArray(lmask, &mask));
1019   PetscCall(VecDestroy(&lmask));
1020   PetscCall(PetscFree(lrows));
1021 
1022   /* only change matrix nonzero state if pattern was allowed to be changed */
1023   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1024     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1025     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1026   }
1027   PetscFunctionReturn(PETSC_SUCCESS);
1028 }
1029 
1030 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1031 {
1032   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1033   PetscInt    nt;
1034   VecScatter  Mvctx = a->Mvctx;
1035 
1036   PetscFunctionBegin;
1037   PetscCall(VecGetLocalSize(xx, &nt));
1038   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1039   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1040   PetscUseTypeMethod(a->A, mult, xx, yy);
1041   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1043   PetscFunctionReturn(PETSC_SUCCESS);
1044 }
1045 
1046 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1047 {
1048   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1049 
1050   PetscFunctionBegin;
1051   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1056 {
1057   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1058   VecScatter  Mvctx = a->Mvctx;
1059 
1060   PetscFunctionBegin;
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   /* do nondiagonal part */
1074   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1075   /* do local part */
1076   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1077   /* add partial results together */
1078   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1079   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1080   PetscFunctionReturn(PETSC_SUCCESS);
1081 }
1082 
1083 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1084 {
1085   MPI_Comm    comm;
1086   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1087   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1088   IS          Me, Notme;
1089   PetscInt    M, N, first, last, *notme, i;
1090   PetscBool   lf;
1091   PetscMPIInt size;
1092 
1093   PetscFunctionBegin;
1094   /* Easy test: symmetric diagonal block */
1095   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1096   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1097   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1098   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1099   PetscCallMPI(MPI_Comm_size(comm, &size));
1100   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1101 
1102   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1103   PetscCall(MatGetSize(Amat, &M, &N));
1104   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1105   PetscCall(PetscMalloc1(N - last + first, &notme));
1106   for (i = 0; i < first; i++) notme[i] = i;
1107   for (i = last; i < M; i++) notme[i - last + first] = i;
1108   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1109   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1110   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1111   Aoff = Aoffs[0];
1112   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1113   Boff = Boffs[0];
1114   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1115   PetscCall(MatDestroyMatrices(1, &Aoffs));
1116   PetscCall(MatDestroyMatrices(1, &Boffs));
1117   PetscCall(ISDestroy(&Me));
1118   PetscCall(ISDestroy(&Notme));
1119   PetscCall(PetscFree(notme));
1120   PetscFunctionReturn(PETSC_SUCCESS);
1121 }
1122 
1123 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1124 {
1125   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1126 
1127   PetscFunctionBegin;
1128   /* do nondiagonal part */
1129   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1130   /* do local part */
1131   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1132   /* add partial results together */
1133   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1134   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1135   PetscFunctionReturn(PETSC_SUCCESS);
1136 }
1137 
1138 /*
1139   This only works correctly for square matrices where the subblock A->A is the
1140    diagonal block
1141 */
1142 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1143 {
1144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1145 
1146   PetscFunctionBegin;
1147   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1148   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1149   PetscCall(MatGetDiagonal(a->A, v));
1150   PetscFunctionReturn(PETSC_SUCCESS);
1151 }
1152 
1153 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1154 {
1155   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1156 
1157   PetscFunctionBegin;
1158   PetscCall(MatScale(a->A, aa));
1159   PetscCall(MatScale(a->B, aa));
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162 
1163 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1164 {
1165   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1166   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1167   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1168   const PetscInt    *garray = aij->garray;
1169   const PetscScalar *aa, *ba;
1170   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1171   PetscInt64         nz, hnz;
1172   PetscInt          *rowlens;
1173   PetscInt          *colidxs;
1174   PetscScalar       *matvals;
1175   PetscMPIInt        rank;
1176 
1177   PetscFunctionBegin;
1178   PetscCall(PetscViewerSetUp(viewer));
1179 
1180   M  = mat->rmap->N;
1181   N  = mat->cmap->N;
1182   m  = mat->rmap->n;
1183   rs = mat->rmap->rstart;
1184   cs = mat->cmap->rstart;
1185   nz = A->nz + B->nz;
1186 
1187   /* write matrix header */
1188   header[0] = MAT_FILE_CLASSID;
1189   header[1] = M;
1190   header[2] = N;
1191   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1192   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1193   if (rank == 0) {
1194     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1195     else header[3] = (PetscInt)hnz;
1196   }
1197   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1198 
1199   /* fill in and store row lengths  */
1200   PetscCall(PetscMalloc1(m, &rowlens));
1201   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1202   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1203   PetscCall(PetscFree(rowlens));
1204 
1205   /* fill in and store column indices */
1206   PetscCall(PetscMalloc1(nz, &colidxs));
1207   for (cnt = 0, i = 0; i < m; i++) {
1208     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1209       if (garray[B->j[jb]] > cs) break;
1210       colidxs[cnt++] = garray[B->j[jb]];
1211     }
1212     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1213     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1214   }
1215   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1216   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1217   PetscCall(PetscFree(colidxs));
1218 
1219   /* fill in and store nonzero values */
1220   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1221   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1222   PetscCall(PetscMalloc1(nz, &matvals));
1223   for (cnt = 0, i = 0; i < m; i++) {
1224     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1225       if (garray[B->j[jb]] > cs) break;
1226       matvals[cnt++] = ba[jb];
1227     }
1228     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1229     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1230   }
1231   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1232   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1233   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1234   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1235   PetscCall(PetscFree(matvals));
1236 
1237   /* write block size option to the viewer's .info file */
1238   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1239   PetscFunctionReturn(PETSC_SUCCESS);
1240 }
1241 
1242 #include <petscdraw.h>
1243 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1244 {
1245   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1246   PetscMPIInt       rank = aij->rank, size = aij->size;
1247   PetscBool         isdraw, iascii, isbinary;
1248   PetscViewer       sviewer;
1249   PetscViewerFormat format;
1250 
1251   PetscFunctionBegin;
1252   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1253   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1255   if (iascii) {
1256     PetscCall(PetscViewerGetFormat(viewer, &format));
1257     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1258       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1259       PetscCall(PetscMalloc1(size, &nz));
1260       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1261       for (i = 0; i < (PetscInt)size; i++) {
1262         nmax = PetscMax(nmax, nz[i]);
1263         nmin = PetscMin(nmin, nz[i]);
1264         navg += nz[i];
1265       }
1266       PetscCall(PetscFree(nz));
1267       navg = navg / size;
1268       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1269       PetscFunctionReturn(PETSC_SUCCESS);
1270     }
1271     PetscCall(PetscViewerGetFormat(viewer, &format));
1272     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1273       MatInfo   info;
1274       PetscInt *inodes = NULL;
1275 
1276       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1277       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1278       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1279       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1280       if (!inodes) {
1281         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1282                                                      (double)info.memory));
1283       } else {
1284         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1285                                                      (double)info.memory));
1286       }
1287       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1288       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1289       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(PetscViewerFlush(viewer));
1292       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1293       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1294       PetscCall(VecScatterView(aij->Mvctx, viewer));
1295       PetscFunctionReturn(PETSC_SUCCESS);
1296     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1297       PetscInt inodecount, inodelimit, *inodes;
1298       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1299       if (inodes) {
1300         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1301       } else {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1303       }
1304       PetscFunctionReturn(PETSC_SUCCESS);
1305     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     }
1308   } else if (isbinary) {
1309     if (size == 1) {
1310       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1311       PetscCall(MatView(aij->A, viewer));
1312     } else {
1313       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1314     }
1315     PetscFunctionReturn(PETSC_SUCCESS);
1316   } else if (iascii && size == 1) {
1317     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1318     PetscCall(MatView(aij->A, viewer));
1319     PetscFunctionReturn(PETSC_SUCCESS);
1320   } else if (isdraw) {
1321     PetscDraw draw;
1322     PetscBool isnull;
1323     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1324     PetscCall(PetscDrawIsNull(draw, &isnull));
1325     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1326   }
1327 
1328   { /* assemble the entire matrix onto first processor */
1329     Mat A = NULL, Av;
1330     IS  isrow, iscol;
1331 
1332     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1333     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1334     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1335     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1336     /*  The commented code uses MatCreateSubMatrices instead */
1337     /*
1338     Mat *AA, A = NULL, Av;
1339     IS  isrow,iscol;
1340 
1341     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1342     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1343     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1344     if (rank == 0) {
1345        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1346        A    = AA[0];
1347        Av   = AA[0];
1348     }
1349     PetscCall(MatDestroySubMatrices(1,&AA));
1350 */
1351     PetscCall(ISDestroy(&iscol));
1352     PetscCall(ISDestroy(&isrow));
1353     /*
1354        Everyone has to call to draw the matrix since the graphics waits are
1355        synchronized across all processors that share the PetscDraw object
1356     */
1357     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1358     if (rank == 0) {
1359       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1360       PetscCall(MatView_SeqAIJ(Av, sviewer));
1361     }
1362     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1363     PetscCall(MatDestroy(&A));
1364   }
1365   PetscFunctionReturn(PETSC_SUCCESS);
1366 }
1367 
1368 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1369 {
1370   PetscBool iascii, isdraw, issocket, isbinary;
1371 
1372   PetscFunctionBegin;
1373   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1374   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1377   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1378   PetscFunctionReturn(PETSC_SUCCESS);
1379 }
1380 
1381 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1382 {
1383   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1384   Vec         bb1 = NULL;
1385   PetscBool   hasop;
1386 
1387   PetscFunctionBegin;
1388   if (flag == SOR_APPLY_UPPER) {
1389     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1390     PetscFunctionReturn(PETSC_SUCCESS);
1391   }
1392 
1393   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1394 
1395   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1396     if (flag & SOR_ZERO_INITIAL_GUESS) {
1397       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1398       its--;
1399     }
1400 
1401     while (its--) {
1402       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1403       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1404 
1405       /* update rhs: bb1 = bb - B*x */
1406       PetscCall(VecScale(mat->lvec, -1.0));
1407       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1408 
1409       /* local sweep */
1410       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1411     }
1412   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1413     if (flag & SOR_ZERO_INITIAL_GUESS) {
1414       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1415       its--;
1416     }
1417     while (its--) {
1418       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1419       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1420 
1421       /* update rhs: bb1 = bb - B*x */
1422       PetscCall(VecScale(mat->lvec, -1.0));
1423       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1424 
1425       /* local sweep */
1426       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1427     }
1428   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1429     if (flag & SOR_ZERO_INITIAL_GUESS) {
1430       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1431       its--;
1432     }
1433     while (its--) {
1434       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1435       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1436 
1437       /* update rhs: bb1 = bb - B*x */
1438       PetscCall(VecScale(mat->lvec, -1.0));
1439       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1440 
1441       /* local sweep */
1442       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1443     }
1444   } else if (flag & SOR_EISENSTAT) {
1445     Vec xx1;
1446 
1447     PetscCall(VecDuplicate(bb, &xx1));
1448     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1449 
1450     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1451     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1452     if (!mat->diag) {
1453       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1454       PetscCall(MatGetDiagonal(matin, mat->diag));
1455     }
1456     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1457     if (hasop) {
1458       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1459     } else {
1460       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1461     }
1462     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1463 
1464     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1465 
1466     /* local sweep */
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1468     PetscCall(VecAXPY(xx, 1.0, xx1));
1469     PetscCall(VecDestroy(&xx1));
1470   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1471 
1472   PetscCall(VecDestroy(&bb1));
1473 
1474   matin->factorerrortype = mat->A->factorerrortype;
1475   PetscFunctionReturn(PETSC_SUCCESS);
1476 }
1477 
1478 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1479 {
1480   Mat             aA, aB, Aperm;
1481   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1482   PetscScalar    *aa, *ba;
1483   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1484   PetscSF         rowsf, sf;
1485   IS              parcolp = NULL;
1486   PetscBool       done;
1487 
1488   PetscFunctionBegin;
1489   PetscCall(MatGetLocalSize(A, &m, &n));
1490   PetscCall(ISGetIndices(rowp, &rwant));
1491   PetscCall(ISGetIndices(colp, &cwant));
1492   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1493 
1494   /* Invert row permutation to find out where my rows should go */
1495   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1496   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1497   PetscCall(PetscSFSetFromOptions(rowsf));
1498   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1499   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1500   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1501 
1502   /* Invert column permutation to find out where my columns should go */
1503   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1504   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1505   PetscCall(PetscSFSetFromOptions(sf));
1506   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1507   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1508   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1509   PetscCall(PetscSFDestroy(&sf));
1510 
1511   PetscCall(ISRestoreIndices(rowp, &rwant));
1512   PetscCall(ISRestoreIndices(colp, &cwant));
1513   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1514 
1515   /* Find out where my gcols should go */
1516   PetscCall(MatGetSize(aB, NULL, &ng));
1517   PetscCall(PetscMalloc1(ng, &gcdest));
1518   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1519   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1520   PetscCall(PetscSFSetFromOptions(sf));
1521   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1522   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1523   PetscCall(PetscSFDestroy(&sf));
1524 
1525   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1526   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1527   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1528   for (i = 0; i < m; i++) {
1529     PetscInt    row = rdest[i];
1530     PetscMPIInt rowner;
1531     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1532     for (j = ai[i]; j < ai[i + 1]; j++) {
1533       PetscInt    col = cdest[aj[j]];
1534       PetscMPIInt cowner;
1535       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1536       if (rowner == cowner) dnnz[i]++;
1537       else onnz[i]++;
1538     }
1539     for (j = bi[i]; j < bi[i + 1]; j++) {
1540       PetscInt    col = gcdest[bj[j]];
1541       PetscMPIInt cowner;
1542       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1543       if (rowner == cowner) dnnz[i]++;
1544       else onnz[i]++;
1545     }
1546   }
1547   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1548   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1551   PetscCall(PetscSFDestroy(&rowsf));
1552 
1553   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1554   PetscCall(MatSeqAIJGetArray(aA, &aa));
1555   PetscCall(MatSeqAIJGetArray(aB, &ba));
1556   for (i = 0; i < m; i++) {
1557     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1558     PetscInt  j0, rowlen;
1559     rowlen = ai[i + 1] - ai[i];
1560     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1561       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1562       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1563     }
1564     rowlen = bi[i + 1] - bi[i];
1565     for (j0 = j = 0; j < rowlen; j0 = j) {
1566       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1567       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1568     }
1569   }
1570   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1571   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1572   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1573   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1574   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1575   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1576   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1577   PetscCall(PetscFree3(work, rdest, cdest));
1578   PetscCall(PetscFree(gcdest));
1579   if (parcolp) PetscCall(ISDestroy(&colp));
1580   *B = Aperm;
1581   PetscFunctionReturn(PETSC_SUCCESS);
1582 }
1583 
1584 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1585 {
1586   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1587 
1588   PetscFunctionBegin;
1589   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1590   if (ghosts) *ghosts = aij->garray;
1591   PetscFunctionReturn(PETSC_SUCCESS);
1592 }
1593 
1594 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1595 {
1596   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1597   Mat            A = mat->A, B = mat->B;
1598   PetscLogDouble isend[5], irecv[5];
1599 
1600   PetscFunctionBegin;
1601   info->block_size = 1.0;
1602   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1603 
1604   isend[0] = info->nz_used;
1605   isend[1] = info->nz_allocated;
1606   isend[2] = info->nz_unneeded;
1607   isend[3] = info->memory;
1608   isend[4] = info->mallocs;
1609 
1610   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1611 
1612   isend[0] += info->nz_used;
1613   isend[1] += info->nz_allocated;
1614   isend[2] += info->nz_unneeded;
1615   isend[3] += info->memory;
1616   isend[4] += info->mallocs;
1617   if (flag == MAT_LOCAL) {
1618     info->nz_used      = isend[0];
1619     info->nz_allocated = isend[1];
1620     info->nz_unneeded  = isend[2];
1621     info->memory       = isend[3];
1622     info->mallocs      = isend[4];
1623   } else if (flag == MAT_GLOBAL_MAX) {
1624     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1625 
1626     info->nz_used      = irecv[0];
1627     info->nz_allocated = irecv[1];
1628     info->nz_unneeded  = irecv[2];
1629     info->memory       = irecv[3];
1630     info->mallocs      = irecv[4];
1631   } else if (flag == MAT_GLOBAL_SUM) {
1632     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1633 
1634     info->nz_used      = irecv[0];
1635     info->nz_allocated = irecv[1];
1636     info->nz_unneeded  = irecv[2];
1637     info->memory       = irecv[3];
1638     info->mallocs      = irecv[4];
1639   }
1640   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1641   info->fill_ratio_needed = 0;
1642   info->factor_mallocs    = 0;
1643   PetscFunctionReturn(PETSC_SUCCESS);
1644 }
1645 
1646 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1647 {
1648   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1649 
1650   PetscFunctionBegin;
1651   switch (op) {
1652   case MAT_NEW_NONZERO_LOCATIONS:
1653   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1654   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1655   case MAT_KEEP_NONZERO_PATTERN:
1656   case MAT_NEW_NONZERO_LOCATION_ERR:
1657   case MAT_USE_INODES:
1658   case MAT_IGNORE_ZERO_ENTRIES:
1659   case MAT_FORM_EXPLICIT_TRANSPOSE:
1660     MatCheckPreallocated(A, 1);
1661     PetscCall(MatSetOption(a->A, op, flg));
1662     PetscCall(MatSetOption(a->B, op, flg));
1663     break;
1664   case MAT_ROW_ORIENTED:
1665     MatCheckPreallocated(A, 1);
1666     a->roworiented = flg;
1667 
1668     PetscCall(MatSetOption(a->A, op, flg));
1669     PetscCall(MatSetOption(a->B, op, flg));
1670     break;
1671   case MAT_FORCE_DIAGONAL_ENTRIES:
1672   case MAT_SORTED_FULL:
1673     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1674     break;
1675   case MAT_IGNORE_OFF_PROC_ENTRIES:
1676     a->donotstash = flg;
1677     break;
1678   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1679   case MAT_SPD:
1680   case MAT_SYMMETRIC:
1681   case MAT_STRUCTURALLY_SYMMETRIC:
1682   case MAT_HERMITIAN:
1683   case MAT_SYMMETRY_ETERNAL:
1684   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1685   case MAT_SPD_ETERNAL:
1686     /* if the diagonal matrix is square it inherits some of the properties above */
1687     break;
1688   case MAT_SUBMAT_SINGLEIS:
1689     A->submat_singleis = flg;
1690     break;
1691   case MAT_STRUCTURE_ONLY:
1692     /* The option is handled directly by MatSetOption() */
1693     break;
1694   default:
1695     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1696   }
1697   PetscFunctionReturn(PETSC_SUCCESS);
1698 }
1699 
1700 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1701 {
1702   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1703   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1704   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1705   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1706   PetscInt    *cmap, *idx_p;
1707 
1708   PetscFunctionBegin;
1709   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1710   mat->getrowactive = PETSC_TRUE;
1711 
1712   if (!mat->rowvalues && (idx || v)) {
1713     /*
1714         allocate enough space to hold information from the longest row.
1715     */
1716     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1717     PetscInt    max = 1, tmp;
1718     for (i = 0; i < matin->rmap->n; i++) {
1719       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1720       if (max < tmp) max = tmp;
1721     }
1722     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1723   }
1724 
1725   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1726   lrow = row - rstart;
1727 
1728   pvA = &vworkA;
1729   pcA = &cworkA;
1730   pvB = &vworkB;
1731   pcB = &cworkB;
1732   if (!v) {
1733     pvA = NULL;
1734     pvB = NULL;
1735   }
1736   if (!idx) {
1737     pcA = NULL;
1738     if (!v) pcB = NULL;
1739   }
1740   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1741   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1742   nztot = nzA + nzB;
1743 
1744   cmap = mat->garray;
1745   if (v || idx) {
1746     if (nztot) {
1747       /* Sort by increasing column numbers, assuming A and B already sorted */
1748       PetscInt imark = -1;
1749       if (v) {
1750         *v = v_p = mat->rowvalues;
1751         for (i = 0; i < nzB; i++) {
1752           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1753           else break;
1754         }
1755         imark = i;
1756         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1757         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1758       }
1759       if (idx) {
1760         *idx = idx_p = mat->rowindices;
1761         if (imark > -1) {
1762           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1763         } else {
1764           for (i = 0; i < nzB; i++) {
1765             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1766             else break;
1767           }
1768           imark = i;
1769         }
1770         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1771         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1772       }
1773     } else {
1774       if (idx) *idx = NULL;
1775       if (v) *v = NULL;
1776     }
1777   }
1778   *nz = nztot;
1779   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1780   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1781   PetscFunctionReturn(PETSC_SUCCESS);
1782 }
1783 
1784 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1785 {
1786   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1787 
1788   PetscFunctionBegin;
1789   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1790   aij->getrowactive = PETSC_FALSE;
1791   PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793 
1794 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1795 {
1796   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1797   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1798   PetscInt         i, j, cstart = mat->cmap->rstart;
1799   PetscReal        sum = 0.0;
1800   const MatScalar *v, *amata, *bmata;
1801 
1802   PetscFunctionBegin;
1803   if (aij->size == 1) {
1804     PetscCall(MatNorm(aij->A, type, norm));
1805   } else {
1806     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1807     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1808     if (type == NORM_FROBENIUS) {
1809       v = amata;
1810       for (i = 0; i < amat->nz; i++) {
1811         sum += PetscRealPart(PetscConj(*v) * (*v));
1812         v++;
1813       }
1814       v = bmata;
1815       for (i = 0; i < bmat->nz; i++) {
1816         sum += PetscRealPart(PetscConj(*v) * (*v));
1817         v++;
1818       }
1819       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1820       *norm = PetscSqrtReal(*norm);
1821       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1822     } else if (type == NORM_1) { /* max column norm */
1823       PetscReal *tmp, *tmp2;
1824       PetscInt  *jj, *garray = aij->garray;
1825       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1826       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1827       *norm = 0.0;
1828       v     = amata;
1829       jj    = amat->j;
1830       for (j = 0; j < amat->nz; j++) {
1831         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1832         v++;
1833       }
1834       v  = bmata;
1835       jj = bmat->j;
1836       for (j = 0; j < bmat->nz; j++) {
1837         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1838         v++;
1839       }
1840       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1841       for (j = 0; j < mat->cmap->N; j++) {
1842         if (tmp2[j] > *norm) *norm = tmp2[j];
1843       }
1844       PetscCall(PetscFree(tmp));
1845       PetscCall(PetscFree(tmp2));
1846       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1847     } else if (type == NORM_INFINITY) { /* max row norm */
1848       PetscReal ntemp = 0.0;
1849       for (j = 0; j < aij->A->rmap->n; j++) {
1850         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1851         sum = 0.0;
1852         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1853           sum += PetscAbsScalar(*v);
1854           v++;
1855         }
1856         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1857         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1858           sum += PetscAbsScalar(*v);
1859           v++;
1860         }
1861         if (sum > ntemp) ntemp = sum;
1862       }
1863       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1864       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1865     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1866     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1867     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1868   }
1869   PetscFunctionReturn(PETSC_SUCCESS);
1870 }
1871 
1872 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1873 {
1874   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1875   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1876   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1877   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1878   Mat              B, A_diag, *B_diag;
1879   const MatScalar *pbv, *bv;
1880 
1881   PetscFunctionBegin;
1882   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1883   ma = A->rmap->n;
1884   na = A->cmap->n;
1885   mb = a->B->rmap->n;
1886   nb = a->B->cmap->n;
1887   ai = Aloc->i;
1888   aj = Aloc->j;
1889   bi = Bloc->i;
1890   bj = Bloc->j;
1891   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1892     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1893     PetscSFNode         *oloc;
1894     PETSC_UNUSED PetscSF sf;
1895 
1896     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1897     /* compute d_nnz for preallocation */
1898     PetscCall(PetscArrayzero(d_nnz, na));
1899     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1900     /* compute local off-diagonal contributions */
1901     PetscCall(PetscArrayzero(g_nnz, nb));
1902     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1903     /* map those to global */
1904     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1905     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1906     PetscCall(PetscSFSetFromOptions(sf));
1907     PetscCall(PetscArrayzero(o_nnz, na));
1908     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1909     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1910     PetscCall(PetscSFDestroy(&sf));
1911 
1912     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1913     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1914     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1915     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1916     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1917     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1918   } else {
1919     B = *matout;
1920     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1921   }
1922 
1923   b           = (Mat_MPIAIJ *)B->data;
1924   A_diag      = a->A;
1925   B_diag      = &b->A;
1926   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1927   A_diag_ncol = A_diag->cmap->N;
1928   B_diag_ilen = sub_B_diag->ilen;
1929   B_diag_i    = sub_B_diag->i;
1930 
1931   /* Set ilen for diagonal of B */
1932   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1933 
1934   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1935   very quickly (=without using MatSetValues), because all writes are local. */
1936   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1937   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1938 
1939   /* copy over the B part */
1940   PetscCall(PetscMalloc1(bi[mb], &cols));
1941   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1942   pbv = bv;
1943   row = A->rmap->rstart;
1944   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1945   cols_tmp = cols;
1946   for (i = 0; i < mb; i++) {
1947     ncol = bi[i + 1] - bi[i];
1948     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1949     row++;
1950     if (pbv) pbv += ncol;
1951     if (cols_tmp) cols_tmp += ncol;
1952   }
1953   PetscCall(PetscFree(cols));
1954   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1955 
1956   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1957   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1958   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1959     *matout = B;
1960   } else {
1961     PetscCall(MatHeaderMerge(A, &B));
1962   }
1963   PetscFunctionReturn(PETSC_SUCCESS);
1964 }
1965 
1966 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1967 {
1968   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1969   Mat         a = aij->A, b = aij->B;
1970   PetscInt    s1, s2, s3;
1971 
1972   PetscFunctionBegin;
1973   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1974   if (rr) {
1975     PetscCall(VecGetLocalSize(rr, &s1));
1976     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1977     /* Overlap communication with computation. */
1978     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1979   }
1980   if (ll) {
1981     PetscCall(VecGetLocalSize(ll, &s1));
1982     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1983     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1984   }
1985   /* scale  the diagonal block */
1986   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1987 
1988   if (rr) {
1989     /* Do a scatter end and then right scale the off-diagonal block */
1990     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1991     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1992   }
1993   PetscFunctionReturn(PETSC_SUCCESS);
1994 }
1995 
1996 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1997 {
1998   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1999 
2000   PetscFunctionBegin;
2001   PetscCall(MatSetUnfactored(a->A));
2002   PetscFunctionReturn(PETSC_SUCCESS);
2003 }
2004 
2005 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2006 {
2007   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2008   Mat         a, b, c, d;
2009   PetscBool   flg;
2010 
2011   PetscFunctionBegin;
2012   a = matA->A;
2013   b = matA->B;
2014   c = matB->A;
2015   d = matB->B;
2016 
2017   PetscCall(MatEqual(a, c, &flg));
2018   if (flg) PetscCall(MatEqual(b, d, &flg));
2019   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2020   PetscFunctionReturn(PETSC_SUCCESS);
2021 }
2022 
2023 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2024 {
2025   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2026   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2027 
2028   PetscFunctionBegin;
2029   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2030   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2031     /* because of the column compression in the off-processor part of the matrix a->B,
2032        the number of columns in a->B and b->B may be different, hence we cannot call
2033        the MatCopy() directly on the two parts. If need be, we can provide a more
2034        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2035        then copying the submatrices */
2036     PetscCall(MatCopy_Basic(A, B, str));
2037   } else {
2038     PetscCall(MatCopy(a->A, b->A, str));
2039     PetscCall(MatCopy(a->B, b->B, str));
2040   }
2041   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2042   PetscFunctionReturn(PETSC_SUCCESS);
2043 }
2044 
2045 /*
2046    Computes the number of nonzeros per row needed for preallocation when X and Y
2047    have different nonzero structure.
2048 */
2049 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2050 {
2051   PetscInt i, j, k, nzx, nzy;
2052 
2053   PetscFunctionBegin;
2054   /* Set the number of nonzeros in the new matrix */
2055   for (i = 0; i < m; i++) {
2056     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2057     nzx    = xi[i + 1] - xi[i];
2058     nzy    = yi[i + 1] - yi[i];
2059     nnz[i] = 0;
2060     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2061       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2062       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2063       nnz[i]++;
2064     }
2065     for (; k < nzy; k++) nnz[i]++;
2066   }
2067   PetscFunctionReturn(PETSC_SUCCESS);
2068 }
2069 
2070 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2071 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2072 {
2073   PetscInt    m = Y->rmap->N;
2074   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2075   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2076 
2077   PetscFunctionBegin;
2078   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2079   PetscFunctionReturn(PETSC_SUCCESS);
2080 }
2081 
2082 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2083 {
2084   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2085 
2086   PetscFunctionBegin;
2087   if (str == SAME_NONZERO_PATTERN) {
2088     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2089     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2090   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2091     PetscCall(MatAXPY_Basic(Y, a, X, str));
2092   } else {
2093     Mat       B;
2094     PetscInt *nnz_d, *nnz_o;
2095 
2096     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2097     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2098     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2099     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2100     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2101     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2102     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2103     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2104     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2105     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2106     PetscCall(MatHeaderMerge(Y, &B));
2107     PetscCall(PetscFree(nnz_d));
2108     PetscCall(PetscFree(nnz_o));
2109   }
2110   PetscFunctionReturn(PETSC_SUCCESS);
2111 }
2112 
2113 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2114 
2115 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2116 {
2117   PetscFunctionBegin;
2118   if (PetscDefined(USE_COMPLEX)) {
2119     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2120 
2121     PetscCall(MatConjugate_SeqAIJ(aij->A));
2122     PetscCall(MatConjugate_SeqAIJ(aij->B));
2123   }
2124   PetscFunctionReturn(PETSC_SUCCESS);
2125 }
2126 
2127 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2128 {
2129   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2130 
2131   PetscFunctionBegin;
2132   PetscCall(MatRealPart(a->A));
2133   PetscCall(MatRealPart(a->B));
2134   PetscFunctionReturn(PETSC_SUCCESS);
2135 }
2136 
2137 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2140 
2141   PetscFunctionBegin;
2142   PetscCall(MatImaginaryPart(a->A));
2143   PetscCall(MatImaginaryPart(a->B));
2144   PetscFunctionReturn(PETSC_SUCCESS);
2145 }
2146 
2147 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2148 {
2149   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2150   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2151   PetscScalar       *va, *vv;
2152   Vec                vB, vA;
2153   const PetscScalar *vb;
2154 
2155   PetscFunctionBegin;
2156   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2157   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2158 
2159   PetscCall(VecGetArrayWrite(vA, &va));
2160   if (idx) {
2161     for (i = 0; i < m; i++) {
2162       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2163     }
2164   }
2165 
2166   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2167   PetscCall(PetscMalloc1(m, &idxb));
2168   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2169 
2170   PetscCall(VecGetArrayWrite(v, &vv));
2171   PetscCall(VecGetArrayRead(vB, &vb));
2172   for (i = 0; i < m; i++) {
2173     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2174       vv[i] = vb[i];
2175       if (idx) idx[i] = a->garray[idxb[i]];
2176     } else {
2177       vv[i] = va[i];
2178       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2179     }
2180   }
2181   PetscCall(VecRestoreArrayWrite(vA, &vv));
2182   PetscCall(VecRestoreArrayWrite(vA, &va));
2183   PetscCall(VecRestoreArrayRead(vB, &vb));
2184   PetscCall(PetscFree(idxb));
2185   PetscCall(VecDestroy(&vA));
2186   PetscCall(VecDestroy(&vB));
2187   PetscFunctionReturn(PETSC_SUCCESS);
2188 }
2189 
2190 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2191 {
2192   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2193   Vec         vB, vA;
2194 
2195   PetscFunctionBegin;
2196   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2197   PetscCall(MatGetRowSumAbs(a->A, vA));
2198   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2199   PetscCall(MatGetRowSumAbs(a->B, vB));
2200   PetscCall(VecAXPY(vA, 1.0, vB));
2201   PetscCall(VecDestroy(&vB));
2202   PetscCall(VecCopy(vA, v));
2203   PetscCall(VecDestroy(&vA));
2204   PetscFunctionReturn(PETSC_SUCCESS);
2205 }
2206 
2207 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2208 {
2209   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2210   PetscInt           m = A->rmap->n, n = A->cmap->n;
2211   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2212   PetscInt          *cmap = mat->garray;
2213   PetscInt          *diagIdx, *offdiagIdx;
2214   Vec                diagV, offdiagV;
2215   PetscScalar       *a, *diagA, *offdiagA;
2216   const PetscScalar *ba, *bav;
2217   PetscInt           r, j, col, ncols, *bi, *bj;
2218   Mat                B = mat->B;
2219   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2220 
2221   PetscFunctionBegin;
2222   /* When a process holds entire A and other processes have no entry */
2223   if (A->cmap->N == n) {
2224     PetscCall(VecGetArrayWrite(v, &diagA));
2225     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2226     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2227     PetscCall(VecDestroy(&diagV));
2228     PetscCall(VecRestoreArrayWrite(v, &diagA));
2229     PetscFunctionReturn(PETSC_SUCCESS);
2230   } else if (n == 0) {
2231     if (m) {
2232       PetscCall(VecGetArrayWrite(v, &a));
2233       for (r = 0; r < m; r++) {
2234         a[r] = 0.0;
2235         if (idx) idx[r] = -1;
2236       }
2237       PetscCall(VecRestoreArrayWrite(v, &a));
2238     }
2239     PetscFunctionReturn(PETSC_SUCCESS);
2240   }
2241 
2242   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2243   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2244   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2245   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2246 
2247   /* Get offdiagIdx[] for implicit 0.0 */
2248   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2249   ba = bav;
2250   bi = b->i;
2251   bj = b->j;
2252   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2253   for (r = 0; r < m; r++) {
2254     ncols = bi[r + 1] - bi[r];
2255     if (ncols == A->cmap->N - n) { /* Brow is dense */
2256       offdiagA[r]   = *ba;
2257       offdiagIdx[r] = cmap[0];
2258     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2259       offdiagA[r] = 0.0;
2260 
2261       /* Find first hole in the cmap */
2262       for (j = 0; j < ncols; j++) {
2263         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2264         if (col > j && j < cstart) {
2265           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2266           break;
2267         } else if (col > j + n && j >= cstart) {
2268           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2269           break;
2270         }
2271       }
2272       if (j == ncols && ncols < A->cmap->N - n) {
2273         /* a hole is outside compressed Bcols */
2274         if (ncols == 0) {
2275           if (cstart) {
2276             offdiagIdx[r] = 0;
2277           } else offdiagIdx[r] = cend;
2278         } else { /* ncols > 0 */
2279           offdiagIdx[r] = cmap[ncols - 1] + 1;
2280           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2281         }
2282       }
2283     }
2284 
2285     for (j = 0; j < ncols; j++) {
2286       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2287         offdiagA[r]   = *ba;
2288         offdiagIdx[r] = cmap[*bj];
2289       }
2290       ba++;
2291       bj++;
2292     }
2293   }
2294 
2295   PetscCall(VecGetArrayWrite(v, &a));
2296   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2297   for (r = 0; r < m; ++r) {
2298     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2299       a[r] = diagA[r];
2300       if (idx) idx[r] = cstart + diagIdx[r];
2301     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2302       a[r] = diagA[r];
2303       if (idx) {
2304         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2305           idx[r] = cstart + diagIdx[r];
2306         } else idx[r] = offdiagIdx[r];
2307       }
2308     } else {
2309       a[r] = offdiagA[r];
2310       if (idx) idx[r] = offdiagIdx[r];
2311     }
2312   }
2313   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2314   PetscCall(VecRestoreArrayWrite(v, &a));
2315   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2316   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2317   PetscCall(VecDestroy(&diagV));
2318   PetscCall(VecDestroy(&offdiagV));
2319   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2320   PetscFunctionReturn(PETSC_SUCCESS);
2321 }
2322 
2323 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2324 {
2325   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2326   PetscInt           m = A->rmap->n, n = A->cmap->n;
2327   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2328   PetscInt          *cmap = mat->garray;
2329   PetscInt          *diagIdx, *offdiagIdx;
2330   Vec                diagV, offdiagV;
2331   PetscScalar       *a, *diagA, *offdiagA;
2332   const PetscScalar *ba, *bav;
2333   PetscInt           r, j, col, ncols, *bi, *bj;
2334   Mat                B = mat->B;
2335   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2336 
2337   PetscFunctionBegin;
2338   /* When a process holds entire A and other processes have no entry */
2339   if (A->cmap->N == n) {
2340     PetscCall(VecGetArrayWrite(v, &diagA));
2341     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2342     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2343     PetscCall(VecDestroy(&diagV));
2344     PetscCall(VecRestoreArrayWrite(v, &diagA));
2345     PetscFunctionReturn(PETSC_SUCCESS);
2346   } else if (n == 0) {
2347     if (m) {
2348       PetscCall(VecGetArrayWrite(v, &a));
2349       for (r = 0; r < m; r++) {
2350         a[r] = PETSC_MAX_REAL;
2351         if (idx) idx[r] = -1;
2352       }
2353       PetscCall(VecRestoreArrayWrite(v, &a));
2354     }
2355     PetscFunctionReturn(PETSC_SUCCESS);
2356   }
2357 
2358   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2359   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2360   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2361   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2362 
2363   /* Get offdiagIdx[] for implicit 0.0 */
2364   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2365   ba = bav;
2366   bi = b->i;
2367   bj = b->j;
2368   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2369   for (r = 0; r < m; r++) {
2370     ncols = bi[r + 1] - bi[r];
2371     if (ncols == A->cmap->N - n) { /* Brow is dense */
2372       offdiagA[r]   = *ba;
2373       offdiagIdx[r] = cmap[0];
2374     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2375       offdiagA[r] = 0.0;
2376 
2377       /* Find first hole in the cmap */
2378       for (j = 0; j < ncols; j++) {
2379         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2380         if (col > j && j < cstart) {
2381           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2382           break;
2383         } else if (col > j + n && j >= cstart) {
2384           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2385           break;
2386         }
2387       }
2388       if (j == ncols && ncols < A->cmap->N - n) {
2389         /* a hole is outside compressed Bcols */
2390         if (ncols == 0) {
2391           if (cstart) {
2392             offdiagIdx[r] = 0;
2393           } else offdiagIdx[r] = cend;
2394         } else { /* ncols > 0 */
2395           offdiagIdx[r] = cmap[ncols - 1] + 1;
2396           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2397         }
2398       }
2399     }
2400 
2401     for (j = 0; j < ncols; j++) {
2402       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2403         offdiagA[r]   = *ba;
2404         offdiagIdx[r] = cmap[*bj];
2405       }
2406       ba++;
2407       bj++;
2408     }
2409   }
2410 
2411   PetscCall(VecGetArrayWrite(v, &a));
2412   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2413   for (r = 0; r < m; ++r) {
2414     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2415       a[r] = diagA[r];
2416       if (idx) idx[r] = cstart + diagIdx[r];
2417     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2418       a[r] = diagA[r];
2419       if (idx) {
2420         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2421           idx[r] = cstart + diagIdx[r];
2422         } else idx[r] = offdiagIdx[r];
2423       }
2424     } else {
2425       a[r] = offdiagA[r];
2426       if (idx) idx[r] = offdiagIdx[r];
2427     }
2428   }
2429   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2430   PetscCall(VecRestoreArrayWrite(v, &a));
2431   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2432   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2433   PetscCall(VecDestroy(&diagV));
2434   PetscCall(VecDestroy(&offdiagV));
2435   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2436   PetscFunctionReturn(PETSC_SUCCESS);
2437 }
2438 
2439 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2440 {
2441   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2442   PetscInt           m = A->rmap->n, n = A->cmap->n;
2443   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2444   PetscInt          *cmap = mat->garray;
2445   PetscInt          *diagIdx, *offdiagIdx;
2446   Vec                diagV, offdiagV;
2447   PetscScalar       *a, *diagA, *offdiagA;
2448   const PetscScalar *ba, *bav;
2449   PetscInt           r, j, col, ncols, *bi, *bj;
2450   Mat                B = mat->B;
2451   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2452 
2453   PetscFunctionBegin;
2454   /* When a process holds entire A and other processes have no entry */
2455   if (A->cmap->N == n) {
2456     PetscCall(VecGetArrayWrite(v, &diagA));
2457     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2458     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2459     PetscCall(VecDestroy(&diagV));
2460     PetscCall(VecRestoreArrayWrite(v, &diagA));
2461     PetscFunctionReturn(PETSC_SUCCESS);
2462   } else if (n == 0) {
2463     if (m) {
2464       PetscCall(VecGetArrayWrite(v, &a));
2465       for (r = 0; r < m; r++) {
2466         a[r] = PETSC_MIN_REAL;
2467         if (idx) idx[r] = -1;
2468       }
2469       PetscCall(VecRestoreArrayWrite(v, &a));
2470     }
2471     PetscFunctionReturn(PETSC_SUCCESS);
2472   }
2473 
2474   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2475   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2476   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2477   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2478 
2479   /* Get offdiagIdx[] for implicit 0.0 */
2480   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2481   ba = bav;
2482   bi = b->i;
2483   bj = b->j;
2484   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2485   for (r = 0; r < m; r++) {
2486     ncols = bi[r + 1] - bi[r];
2487     if (ncols == A->cmap->N - n) { /* Brow is dense */
2488       offdiagA[r]   = *ba;
2489       offdiagIdx[r] = cmap[0];
2490     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2491       offdiagA[r] = 0.0;
2492 
2493       /* Find first hole in the cmap */
2494       for (j = 0; j < ncols; j++) {
2495         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2496         if (col > j && j < cstart) {
2497           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2498           break;
2499         } else if (col > j + n && j >= cstart) {
2500           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2501           break;
2502         }
2503       }
2504       if (j == ncols && ncols < A->cmap->N - n) {
2505         /* a hole is outside compressed Bcols */
2506         if (ncols == 0) {
2507           if (cstart) {
2508             offdiagIdx[r] = 0;
2509           } else offdiagIdx[r] = cend;
2510         } else { /* ncols > 0 */
2511           offdiagIdx[r] = cmap[ncols - 1] + 1;
2512           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2513         }
2514       }
2515     }
2516 
2517     for (j = 0; j < ncols; j++) {
2518       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2519         offdiagA[r]   = *ba;
2520         offdiagIdx[r] = cmap[*bj];
2521       }
2522       ba++;
2523       bj++;
2524     }
2525   }
2526 
2527   PetscCall(VecGetArrayWrite(v, &a));
2528   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2529   for (r = 0; r < m; ++r) {
2530     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2531       a[r] = diagA[r];
2532       if (idx) idx[r] = cstart + diagIdx[r];
2533     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2534       a[r] = diagA[r];
2535       if (idx) {
2536         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2537           idx[r] = cstart + diagIdx[r];
2538         } else idx[r] = offdiagIdx[r];
2539       }
2540     } else {
2541       a[r] = offdiagA[r];
2542       if (idx) idx[r] = offdiagIdx[r];
2543     }
2544   }
2545   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2546   PetscCall(VecRestoreArrayWrite(v, &a));
2547   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2548   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2549   PetscCall(VecDestroy(&diagV));
2550   PetscCall(VecDestroy(&offdiagV));
2551   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2552   PetscFunctionReturn(PETSC_SUCCESS);
2553 }
2554 
2555 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2556 {
2557   Mat *dummy;
2558 
2559   PetscFunctionBegin;
2560   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2561   *newmat = *dummy;
2562   PetscCall(PetscFree(dummy));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2567 {
2568   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatInvertBlockDiagonal(a->A, values));
2572   A->factorerrortype = a->A->factorerrortype;
2573   PetscFunctionReturn(PETSC_SUCCESS);
2574 }
2575 
2576 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2577 {
2578   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2579 
2580   PetscFunctionBegin;
2581   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2582   PetscCall(MatSetRandom(aij->A, rctx));
2583   if (x->assembled) {
2584     PetscCall(MatSetRandom(aij->B, rctx));
2585   } else {
2586     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2587   }
2588   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2589   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2594 {
2595   PetscFunctionBegin;
2596   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2597   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2598   PetscFunctionReturn(PETSC_SUCCESS);
2599 }
2600 
2601 /*@
2602   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2603 
2604   Not Collective
2605 
2606   Input Parameter:
2607 . A - the matrix
2608 
2609   Output Parameter:
2610 . nz - the number of nonzeros
2611 
2612   Level: advanced
2613 
2614 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2615 @*/
2616 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2617 {
2618   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2619   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2620   PetscBool   isaij;
2621 
2622   PetscFunctionBegin;
2623   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2624   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2625   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2626   PetscFunctionReturn(PETSC_SUCCESS);
2627 }
2628 
2629 /*@
2630   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2631 
2632   Collective
2633 
2634   Input Parameters:
2635 + A  - the matrix
2636 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2637 
2638   Level: advanced
2639 
2640 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2641 @*/
2642 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2643 {
2644   PetscFunctionBegin;
2645   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2646   PetscFunctionReturn(PETSC_SUCCESS);
2647 }
2648 
2649 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2650 {
2651   PetscBool sc = PETSC_FALSE, flg;
2652 
2653   PetscFunctionBegin;
2654   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2655   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2656   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2657   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2658   PetscOptionsHeadEnd();
2659   PetscFunctionReturn(PETSC_SUCCESS);
2660 }
2661 
2662 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2663 {
2664   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2665   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2666 
2667   PetscFunctionBegin;
2668   if (!Y->preallocated) {
2669     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2670   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2671     PetscInt nonew = aij->nonew;
2672     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2673     aij->nonew = nonew;
2674   }
2675   PetscCall(MatShift_Basic(Y, a));
2676   PetscFunctionReturn(PETSC_SUCCESS);
2677 }
2678 
2679 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2680 {
2681   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2682 
2683   PetscFunctionBegin;
2684   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2685   PetscCall(MatMissingDiagonal(a->A, missing, d));
2686   if (d) {
2687     PetscInt rstart;
2688     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2689     *d += rstart;
2690   }
2691   PetscFunctionReturn(PETSC_SUCCESS);
2692 }
2693 
2694 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2695 {
2696   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2697 
2698   PetscFunctionBegin;
2699   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2700   PetscFunctionReturn(PETSC_SUCCESS);
2701 }
2702 
2703 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2704 {
2705   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2706 
2707   PetscFunctionBegin;
2708   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2709   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2714                                        MatGetRow_MPIAIJ,
2715                                        MatRestoreRow_MPIAIJ,
2716                                        MatMult_MPIAIJ,
2717                                        /* 4*/ MatMultAdd_MPIAIJ,
2718                                        MatMultTranspose_MPIAIJ,
2719                                        MatMultTransposeAdd_MPIAIJ,
2720                                        NULL,
2721                                        NULL,
2722                                        NULL,
2723                                        /*10*/ NULL,
2724                                        NULL,
2725                                        NULL,
2726                                        MatSOR_MPIAIJ,
2727                                        MatTranspose_MPIAIJ,
2728                                        /*15*/ MatGetInfo_MPIAIJ,
2729                                        MatEqual_MPIAIJ,
2730                                        MatGetDiagonal_MPIAIJ,
2731                                        MatDiagonalScale_MPIAIJ,
2732                                        MatNorm_MPIAIJ,
2733                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2734                                        MatAssemblyEnd_MPIAIJ,
2735                                        MatSetOption_MPIAIJ,
2736                                        MatZeroEntries_MPIAIJ,
2737                                        /*24*/ MatZeroRows_MPIAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                        NULL,
2742                                        /*29*/ MatSetUp_MPI_Hash,
2743                                        NULL,
2744                                        NULL,
2745                                        MatGetDiagonalBlock_MPIAIJ,
2746                                        NULL,
2747                                        /*34*/ MatDuplicate_MPIAIJ,
2748                                        NULL,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        /*39*/ MatAXPY_MPIAIJ,
2753                                        MatCreateSubMatrices_MPIAIJ,
2754                                        MatIncreaseOverlap_MPIAIJ,
2755                                        MatGetValues_MPIAIJ,
2756                                        MatCopy_MPIAIJ,
2757                                        /*44*/ MatGetRowMax_MPIAIJ,
2758                                        MatScale_MPIAIJ,
2759                                        MatShift_MPIAIJ,
2760                                        MatDiagonalSet_MPIAIJ,
2761                                        MatZeroRowsColumns_MPIAIJ,
2762                                        /*49*/ MatSetRandom_MPIAIJ,
2763                                        MatGetRowIJ_MPIAIJ,
2764                                        MatRestoreRowIJ_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2768                                        NULL,
2769                                        MatSetUnfactored_MPIAIJ,
2770                                        MatPermute_MPIAIJ,
2771                                        NULL,
2772                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2773                                        MatDestroy_MPIAIJ,
2774                                        MatView_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                        /*64*/ NULL,
2778                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2783                                        MatGetRowMinAbs_MPIAIJ,
2784                                        NULL,
2785                                        NULL,
2786                                        NULL,
2787                                        NULL,
2788                                        /*75*/ MatFDColoringApply_AIJ,
2789                                        MatSetFromOptions_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        MatFindZeroDiagonals_MPIAIJ,
2793                                        /*80*/ NULL,
2794                                        NULL,
2795                                        NULL,
2796                                        /*83*/ MatLoad_MPIAIJ,
2797                                        NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        /*89*/ NULL,
2803                                        NULL,
2804                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2805                                        NULL,
2806                                        NULL,
2807                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        MatBindToCPU_MPIAIJ,
2812                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2813                                        NULL,
2814                                        NULL,
2815                                        MatConjugate_MPIAIJ,
2816                                        NULL,
2817                                        /*104*/ MatSetValuesRow_MPIAIJ,
2818                                        MatRealPart_MPIAIJ,
2819                                        MatImaginaryPart_MPIAIJ,
2820                                        NULL,
2821                                        NULL,
2822                                        /*109*/ NULL,
2823                                        NULL,
2824                                        MatGetRowMin_MPIAIJ,
2825                                        NULL,
2826                                        MatMissingDiagonal_MPIAIJ,
2827                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2828                                        NULL,
2829                                        MatGetGhosts_MPIAIJ,
2830                                        NULL,
2831                                        NULL,
2832                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        NULL,
2836                                        MatGetMultiProcBlock_MPIAIJ,
2837                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2838                                        MatGetColumnReductions_MPIAIJ,
2839                                        MatInvertBlockDiagonal_MPIAIJ,
2840                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2841                                        MatCreateSubMatricesMPI_MPIAIJ,
2842                                        /*129*/ NULL,
2843                                        NULL,
2844                                        NULL,
2845                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2846                                        NULL,
2847                                        /*134*/ NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        NULL,
2851                                        NULL,
2852                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2853                                        NULL,
2854                                        NULL,
2855                                        MatFDColoringSetUp_MPIXAIJ,
2856                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2857                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2858                                        /*145*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        MatCreateGraph_Simple_AIJ,
2862                                        NULL,
2863                                        /*150*/ NULL,
2864                                        MatEliminateZeros_MPIAIJ,
2865                                        MatGetRowSumAbs_MPIAIJ};
2866 
2867 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2868 {
2869   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2870 
2871   PetscFunctionBegin;
2872   PetscCall(MatStoreValues(aij->A));
2873   PetscCall(MatStoreValues(aij->B));
2874   PetscFunctionReturn(PETSC_SUCCESS);
2875 }
2876 
2877 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2878 {
2879   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2880 
2881   PetscFunctionBegin;
2882   PetscCall(MatRetrieveValues(aij->A));
2883   PetscCall(MatRetrieveValues(aij->B));
2884   PetscFunctionReturn(PETSC_SUCCESS);
2885 }
2886 
2887 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2888 {
2889   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2890   PetscMPIInt size;
2891 
2892   PetscFunctionBegin;
2893   if (B->hash_active) {
2894     B->ops[0]      = b->cops;
2895     B->hash_active = PETSC_FALSE;
2896   }
2897   PetscCall(PetscLayoutSetUp(B->rmap));
2898   PetscCall(PetscLayoutSetUp(B->cmap));
2899 
2900 #if defined(PETSC_USE_CTABLE)
2901   PetscCall(PetscHMapIDestroy(&b->colmap));
2902 #else
2903   PetscCall(PetscFree(b->colmap));
2904 #endif
2905   PetscCall(PetscFree(b->garray));
2906   PetscCall(VecDestroy(&b->lvec));
2907   PetscCall(VecScatterDestroy(&b->Mvctx));
2908 
2909   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2910 
2911   MatSeqXAIJGetOptions_Private(b->B);
2912   PetscCall(MatDestroy(&b->B));
2913   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2914   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2915   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2916   PetscCall(MatSetType(b->B, MATSEQAIJ));
2917   MatSeqXAIJRestoreOptions_Private(b->B);
2918 
2919   MatSeqXAIJGetOptions_Private(b->A);
2920   PetscCall(MatDestroy(&b->A));
2921   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2922   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2923   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2924   PetscCall(MatSetType(b->A, MATSEQAIJ));
2925   MatSeqXAIJRestoreOptions_Private(b->A);
2926 
2927   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2928   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2929   B->preallocated  = PETSC_TRUE;
2930   B->was_assembled = PETSC_FALSE;
2931   B->assembled     = PETSC_FALSE;
2932   PetscFunctionReturn(PETSC_SUCCESS);
2933 }
2934 
2935 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2936 {
2937   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2938 
2939   PetscFunctionBegin;
2940   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2941   PetscCall(PetscLayoutSetUp(B->rmap));
2942   PetscCall(PetscLayoutSetUp(B->cmap));
2943 
2944 #if defined(PETSC_USE_CTABLE)
2945   PetscCall(PetscHMapIDestroy(&b->colmap));
2946 #else
2947   PetscCall(PetscFree(b->colmap));
2948 #endif
2949   PetscCall(PetscFree(b->garray));
2950   PetscCall(VecDestroy(&b->lvec));
2951   PetscCall(VecScatterDestroy(&b->Mvctx));
2952 
2953   PetscCall(MatResetPreallocation(b->A));
2954   PetscCall(MatResetPreallocation(b->B));
2955   B->preallocated  = PETSC_TRUE;
2956   B->was_assembled = PETSC_FALSE;
2957   B->assembled     = PETSC_FALSE;
2958   PetscFunctionReturn(PETSC_SUCCESS);
2959 }
2960 
2961 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2962 {
2963   Mat         mat;
2964   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2965 
2966   PetscFunctionBegin;
2967   *newmat = NULL;
2968   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2969   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2970   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2971   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2972   a = (Mat_MPIAIJ *)mat->data;
2973 
2974   mat->factortype = matin->factortype;
2975   mat->assembled  = matin->assembled;
2976   mat->insertmode = NOT_SET_VALUES;
2977 
2978   a->size         = oldmat->size;
2979   a->rank         = oldmat->rank;
2980   a->donotstash   = oldmat->donotstash;
2981   a->roworiented  = oldmat->roworiented;
2982   a->rowindices   = NULL;
2983   a->rowvalues    = NULL;
2984   a->getrowactive = PETSC_FALSE;
2985 
2986   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2987   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2988   if (matin->hash_active) {
2989     PetscCall(MatSetUp(mat));
2990   } else {
2991     mat->preallocated = matin->preallocated;
2992     if (oldmat->colmap) {
2993 #if defined(PETSC_USE_CTABLE)
2994       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2995 #else
2996       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2997       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2998 #endif
2999     } else a->colmap = NULL;
3000     if (oldmat->garray) {
3001       PetscInt len;
3002       len = oldmat->B->cmap->n;
3003       PetscCall(PetscMalloc1(len + 1, &a->garray));
3004       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3005     } else a->garray = NULL;
3006 
3007     /* It may happen MatDuplicate is called with a non-assembled matrix
3008       In fact, MatDuplicate only requires the matrix to be preallocated
3009       This may happen inside a DMCreateMatrix_Shell */
3010     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3011     if (oldmat->Mvctx) {
3012       a->Mvctx = oldmat->Mvctx;
3013       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3014     }
3015     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3016     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3017   }
3018   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3019   *newmat = mat;
3020   PetscFunctionReturn(PETSC_SUCCESS);
3021 }
3022 
3023 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3024 {
3025   PetscBool isbinary, ishdf5;
3026 
3027   PetscFunctionBegin;
3028   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3029   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3030   /* force binary viewer to load .info file if it has not yet done so */
3031   PetscCall(PetscViewerSetUp(viewer));
3032   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3033   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3034   if (isbinary) {
3035     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3036   } else if (ishdf5) {
3037 #if defined(PETSC_HAVE_HDF5)
3038     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3039 #else
3040     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3041 #endif
3042   } else {
3043     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3044   }
3045   PetscFunctionReturn(PETSC_SUCCESS);
3046 }
3047 
3048 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3049 {
3050   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3051   PetscInt    *rowidxs, *colidxs;
3052   PetscScalar *matvals;
3053 
3054   PetscFunctionBegin;
3055   PetscCall(PetscViewerSetUp(viewer));
3056 
3057   /* read in matrix header */
3058   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3059   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3060   M  = header[1];
3061   N  = header[2];
3062   nz = header[3];
3063   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3064   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3065   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3066 
3067   /* set block sizes from the viewer's .info file */
3068   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3069   /* set global sizes if not set already */
3070   if (mat->rmap->N < 0) mat->rmap->N = M;
3071   if (mat->cmap->N < 0) mat->cmap->N = N;
3072   PetscCall(PetscLayoutSetUp(mat->rmap));
3073   PetscCall(PetscLayoutSetUp(mat->cmap));
3074 
3075   /* check if the matrix sizes are correct */
3076   PetscCall(MatGetSize(mat, &rows, &cols));
3077   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3078 
3079   /* read in row lengths and build row indices */
3080   PetscCall(MatGetLocalSize(mat, &m, NULL));
3081   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3082   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3083   rowidxs[0] = 0;
3084   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3085   if (nz != PETSC_MAX_INT) {
3086     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3087     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3088   }
3089 
3090   /* read in column indices and matrix values */
3091   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3092   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3093   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3094   /* store matrix indices and values */
3095   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3096   PetscCall(PetscFree(rowidxs));
3097   PetscCall(PetscFree2(colidxs, matvals));
3098   PetscFunctionReturn(PETSC_SUCCESS);
3099 }
3100 
3101 /* Not scalable because of ISAllGather() unless getting all columns. */
3102 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3103 {
3104   IS          iscol_local;
3105   PetscBool   isstride;
3106   PetscMPIInt lisstride = 0, gisstride;
3107 
3108   PetscFunctionBegin;
3109   /* check if we are grabbing all columns*/
3110   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3111 
3112   if (isstride) {
3113     PetscInt start, len, mstart, mlen;
3114     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3115     PetscCall(ISGetLocalSize(iscol, &len));
3116     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3117     if (mstart == start && mlen - mstart == len) lisstride = 1;
3118   }
3119 
3120   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3121   if (gisstride) {
3122     PetscInt N;
3123     PetscCall(MatGetSize(mat, NULL, &N));
3124     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3125     PetscCall(ISSetIdentity(iscol_local));
3126     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3127   } else {
3128     PetscInt cbs;
3129     PetscCall(ISGetBlockSize(iscol, &cbs));
3130     PetscCall(ISAllGather(iscol, &iscol_local));
3131     PetscCall(ISSetBlockSize(iscol_local, cbs));
3132   }
3133 
3134   *isseq = iscol_local;
3135   PetscFunctionReturn(PETSC_SUCCESS);
3136 }
3137 
3138 /*
3139  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3140  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3141 
3142  Input Parameters:
3143 +   mat - matrix
3144 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3145            i.e., mat->rstart <= isrow[i] < mat->rend
3146 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3147            i.e., mat->cstart <= iscol[i] < mat->cend
3148 
3149  Output Parameters:
3150 +   isrow_d - sequential row index set for retrieving mat->A
3151 .   iscol_d - sequential  column index set for retrieving mat->A
3152 .   iscol_o - sequential column index set for retrieving mat->B
3153 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3154  */
3155 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3156 {
3157   Vec             x, cmap;
3158   const PetscInt *is_idx;
3159   PetscScalar    *xarray, *cmaparray;
3160   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3161   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3162   Mat             B    = a->B;
3163   Vec             lvec = a->lvec, lcmap;
3164   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3165   MPI_Comm        comm;
3166   VecScatter      Mvctx = a->Mvctx;
3167 
3168   PetscFunctionBegin;
3169   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3170   PetscCall(ISGetLocalSize(iscol, &ncols));
3171 
3172   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3173   PetscCall(MatCreateVecs(mat, &x, NULL));
3174   PetscCall(VecSet(x, -1.0));
3175   PetscCall(VecDuplicate(x, &cmap));
3176   PetscCall(VecSet(cmap, -1.0));
3177 
3178   /* Get start indices */
3179   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3180   isstart -= ncols;
3181   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3182 
3183   PetscCall(ISGetIndices(iscol, &is_idx));
3184   PetscCall(VecGetArray(x, &xarray));
3185   PetscCall(VecGetArray(cmap, &cmaparray));
3186   PetscCall(PetscMalloc1(ncols, &idx));
3187   for (i = 0; i < ncols; i++) {
3188     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3189     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3190     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3191   }
3192   PetscCall(VecRestoreArray(x, &xarray));
3193   PetscCall(VecRestoreArray(cmap, &cmaparray));
3194   PetscCall(ISRestoreIndices(iscol, &is_idx));
3195 
3196   /* Get iscol_d */
3197   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3198   PetscCall(ISGetBlockSize(iscol, &i));
3199   PetscCall(ISSetBlockSize(*iscol_d, i));
3200 
3201   /* Get isrow_d */
3202   PetscCall(ISGetLocalSize(isrow, &m));
3203   rstart = mat->rmap->rstart;
3204   PetscCall(PetscMalloc1(m, &idx));
3205   PetscCall(ISGetIndices(isrow, &is_idx));
3206   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3207   PetscCall(ISRestoreIndices(isrow, &is_idx));
3208 
3209   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3210   PetscCall(ISGetBlockSize(isrow, &i));
3211   PetscCall(ISSetBlockSize(*isrow_d, i));
3212 
3213   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3214   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3215   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3216 
3217   PetscCall(VecDuplicate(lvec, &lcmap));
3218 
3219   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3220   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3221 
3222   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3223   /* off-process column indices */
3224   count = 0;
3225   PetscCall(PetscMalloc1(Bn, &idx));
3226   PetscCall(PetscMalloc1(Bn, &cmap1));
3227 
3228   PetscCall(VecGetArray(lvec, &xarray));
3229   PetscCall(VecGetArray(lcmap, &cmaparray));
3230   for (i = 0; i < Bn; i++) {
3231     if (PetscRealPart(xarray[i]) > -1.0) {
3232       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3233       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3234       count++;
3235     }
3236   }
3237   PetscCall(VecRestoreArray(lvec, &xarray));
3238   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3239 
3240   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3241   /* cannot ensure iscol_o has same blocksize as iscol! */
3242 
3243   PetscCall(PetscFree(idx));
3244   *garray = cmap1;
3245 
3246   PetscCall(VecDestroy(&x));
3247   PetscCall(VecDestroy(&cmap));
3248   PetscCall(VecDestroy(&lcmap));
3249   PetscFunctionReturn(PETSC_SUCCESS);
3250 }
3251 
3252 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3253 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3254 {
3255   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3256   Mat         M = NULL;
3257   MPI_Comm    comm;
3258   IS          iscol_d, isrow_d, iscol_o;
3259   Mat         Asub = NULL, Bsub = NULL;
3260   PetscInt    n;
3261 
3262   PetscFunctionBegin;
3263   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3264 
3265   if (call == MAT_REUSE_MATRIX) {
3266     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3267     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3268     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3269 
3270     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3271     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3272 
3273     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3274     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3275 
3276     /* Update diagonal and off-diagonal portions of submat */
3277     asub = (Mat_MPIAIJ *)(*submat)->data;
3278     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3279     PetscCall(ISGetLocalSize(iscol_o, &n));
3280     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3281     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3282     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3283 
3284   } else { /* call == MAT_INITIAL_MATRIX) */
3285     const PetscInt *garray;
3286     PetscInt        BsubN;
3287 
3288     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3289     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3290 
3291     /* Create local submatrices Asub and Bsub */
3292     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3293     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3294 
3295     /* Create submatrix M */
3296     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3297 
3298     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3299     asub = (Mat_MPIAIJ *)M->data;
3300 
3301     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3302     n = asub->B->cmap->N;
3303     if (BsubN > n) {
3304       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3305       const PetscInt *idx;
3306       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3307       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3308 
3309       PetscCall(PetscMalloc1(n, &idx_new));
3310       j = 0;
3311       PetscCall(ISGetIndices(iscol_o, &idx));
3312       for (i = 0; i < n; i++) {
3313         if (j >= BsubN) break;
3314         while (subgarray[i] > garray[j]) j++;
3315 
3316         if (subgarray[i] == garray[j]) {
3317           idx_new[i] = idx[j++];
3318         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3319       }
3320       PetscCall(ISRestoreIndices(iscol_o, &idx));
3321 
3322       PetscCall(ISDestroy(&iscol_o));
3323       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3324 
3325     } else if (BsubN < n) {
3326       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3327     }
3328 
3329     PetscCall(PetscFree(garray));
3330     *submat = M;
3331 
3332     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3333     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3334     PetscCall(ISDestroy(&isrow_d));
3335 
3336     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3337     PetscCall(ISDestroy(&iscol_d));
3338 
3339     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3340     PetscCall(ISDestroy(&iscol_o));
3341   }
3342   PetscFunctionReturn(PETSC_SUCCESS);
3343 }
3344 
3345 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3346 {
3347   IS        iscol_local = NULL, isrow_d;
3348   PetscInt  csize;
3349   PetscInt  n, i, j, start, end;
3350   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3351   MPI_Comm  comm;
3352 
3353   PetscFunctionBegin;
3354   /* If isrow has same processor distribution as mat,
3355      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3356   if (call == MAT_REUSE_MATRIX) {
3357     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3358     if (isrow_d) {
3359       sameRowDist  = PETSC_TRUE;
3360       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3361     } else {
3362       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3363       if (iscol_local) {
3364         sameRowDist  = PETSC_TRUE;
3365         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3366       }
3367     }
3368   } else {
3369     /* Check if isrow has same processor distribution as mat */
3370     sameDist[0] = PETSC_FALSE;
3371     PetscCall(ISGetLocalSize(isrow, &n));
3372     if (!n) {
3373       sameDist[0] = PETSC_TRUE;
3374     } else {
3375       PetscCall(ISGetMinMax(isrow, &i, &j));
3376       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3377       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3378     }
3379 
3380     /* Check if iscol has same processor distribution as mat */
3381     sameDist[1] = PETSC_FALSE;
3382     PetscCall(ISGetLocalSize(iscol, &n));
3383     if (!n) {
3384       sameDist[1] = PETSC_TRUE;
3385     } else {
3386       PetscCall(ISGetMinMax(iscol, &i, &j));
3387       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3388       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3389     }
3390 
3391     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3392     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3393     sameRowDist = tsameDist[0];
3394   }
3395 
3396   if (sameRowDist) {
3397     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3398       /* isrow and iscol have same processor distribution as mat */
3399       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3400       PetscFunctionReturn(PETSC_SUCCESS);
3401     } else { /* sameRowDist */
3402       /* isrow has same processor distribution as mat */
3403       if (call == MAT_INITIAL_MATRIX) {
3404         PetscBool sorted;
3405         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3406         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3407         PetscCall(ISGetSize(iscol, &i));
3408         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3409 
3410         PetscCall(ISSorted(iscol_local, &sorted));
3411         if (sorted) {
3412           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3413           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3414           PetscFunctionReturn(PETSC_SUCCESS);
3415         }
3416       } else { /* call == MAT_REUSE_MATRIX */
3417         IS iscol_sub;
3418         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3419         if (iscol_sub) {
3420           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3421           PetscFunctionReturn(PETSC_SUCCESS);
3422         }
3423       }
3424     }
3425   }
3426 
3427   /* General case: iscol -> iscol_local which has global size of iscol */
3428   if (call == MAT_REUSE_MATRIX) {
3429     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3430     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3431   } else {
3432     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3433   }
3434 
3435   PetscCall(ISGetLocalSize(iscol, &csize));
3436   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3437 
3438   if (call == MAT_INITIAL_MATRIX) {
3439     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3440     PetscCall(ISDestroy(&iscol_local));
3441   }
3442   PetscFunctionReturn(PETSC_SUCCESS);
3443 }
3444 
3445 /*@C
3446   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3447   and "off-diagonal" part of the matrix in CSR format.
3448 
3449   Collective
3450 
3451   Input Parameters:
3452 + comm   - MPI communicator
3453 . A      - "diagonal" portion of matrix
3454 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3455 - garray - global index of `B` columns
3456 
3457   Output Parameter:
3458 . mat - the matrix, with input `A` as its local diagonal matrix
3459 
3460   Level: advanced
3461 
3462   Notes:
3463   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3464 
3465   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3466 
3467 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3468 @*/
3469 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3470 {
3471   Mat_MPIAIJ        *maij;
3472   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3473   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3474   const PetscScalar *oa;
3475   Mat                Bnew;
3476   PetscInt           m, n, N;
3477   MatType            mpi_mat_type;
3478 
3479   PetscFunctionBegin;
3480   PetscCall(MatCreate(comm, mat));
3481   PetscCall(MatGetSize(A, &m, &n));
3482   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3483   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3484   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3485   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3486 
3487   /* Get global columns of mat */
3488   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3489 
3490   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3491   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3492   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3493   PetscCall(MatSetType(*mat, mpi_mat_type));
3494 
3495   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3496   maij = (Mat_MPIAIJ *)(*mat)->data;
3497 
3498   (*mat)->preallocated = PETSC_TRUE;
3499 
3500   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3501   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3502 
3503   /* Set A as diagonal portion of *mat */
3504   maij->A = A;
3505 
3506   nz = oi[m];
3507   for (i = 0; i < nz; i++) {
3508     col   = oj[i];
3509     oj[i] = garray[col];
3510   }
3511 
3512   /* Set Bnew as off-diagonal portion of *mat */
3513   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3514   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3515   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3516   bnew        = (Mat_SeqAIJ *)Bnew->data;
3517   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3518   maij->B     = Bnew;
3519 
3520   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3521 
3522   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3523   b->free_a       = PETSC_FALSE;
3524   b->free_ij      = PETSC_FALSE;
3525   PetscCall(MatDestroy(&B));
3526 
3527   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3528   bnew->free_a       = PETSC_TRUE;
3529   bnew->free_ij      = PETSC_TRUE;
3530 
3531   /* condense columns of maij->B */
3532   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3533   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3534   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3535   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3536   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3537   PetscFunctionReturn(PETSC_SUCCESS);
3538 }
3539 
3540 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3541 
3542 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3543 {
3544   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3545   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3546   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3547   Mat             M, Msub, B = a->B;
3548   MatScalar      *aa;
3549   Mat_SeqAIJ     *aij;
3550   PetscInt       *garray = a->garray, *colsub, Ncols;
3551   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3552   IS              iscol_sub, iscmap;
3553   const PetscInt *is_idx, *cmap;
3554   PetscBool       allcolumns = PETSC_FALSE;
3555   MPI_Comm        comm;
3556 
3557   PetscFunctionBegin;
3558   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3559   if (call == MAT_REUSE_MATRIX) {
3560     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3561     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3562     PetscCall(ISGetLocalSize(iscol_sub, &count));
3563 
3564     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3565     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3566 
3567     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3568     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3569 
3570     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3571 
3572   } else { /* call == MAT_INITIAL_MATRIX) */
3573     PetscBool flg;
3574 
3575     PetscCall(ISGetLocalSize(iscol, &n));
3576     PetscCall(ISGetSize(iscol, &Ncols));
3577 
3578     /* (1) iscol -> nonscalable iscol_local */
3579     /* Check for special case: each processor gets entire matrix columns */
3580     PetscCall(ISIdentity(iscol_local, &flg));
3581     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3582     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3583     if (allcolumns) {
3584       iscol_sub = iscol_local;
3585       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3586       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3587 
3588     } else {
3589       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3590       PetscInt *idx, *cmap1, k;
3591       PetscCall(PetscMalloc1(Ncols, &idx));
3592       PetscCall(PetscMalloc1(Ncols, &cmap1));
3593       PetscCall(ISGetIndices(iscol_local, &is_idx));
3594       count = 0;
3595       k     = 0;
3596       for (i = 0; i < Ncols; i++) {
3597         j = is_idx[i];
3598         if (j >= cstart && j < cend) {
3599           /* diagonal part of mat */
3600           idx[count]     = j;
3601           cmap1[count++] = i; /* column index in submat */
3602         } else if (Bn) {
3603           /* off-diagonal part of mat */
3604           if (j == garray[k]) {
3605             idx[count]     = j;
3606             cmap1[count++] = i; /* column index in submat */
3607           } else if (j > garray[k]) {
3608             while (j > garray[k] && k < Bn - 1) k++;
3609             if (j == garray[k]) {
3610               idx[count]     = j;
3611               cmap1[count++] = i; /* column index in submat */
3612             }
3613           }
3614         }
3615       }
3616       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3617 
3618       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3619       PetscCall(ISGetBlockSize(iscol, &cbs));
3620       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3621 
3622       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3623     }
3624 
3625     /* (3) Create sequential Msub */
3626     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3627   }
3628 
3629   PetscCall(ISGetLocalSize(iscol_sub, &count));
3630   aij = (Mat_SeqAIJ *)(Msub)->data;
3631   ii  = aij->i;
3632   PetscCall(ISGetIndices(iscmap, &cmap));
3633 
3634   /*
3635       m - number of local rows
3636       Ncols - number of columns (same on all processors)
3637       rstart - first row in new global matrix generated
3638   */
3639   PetscCall(MatGetSize(Msub, &m, NULL));
3640 
3641   if (call == MAT_INITIAL_MATRIX) {
3642     /* (4) Create parallel newmat */
3643     PetscMPIInt rank, size;
3644     PetscInt    csize;
3645 
3646     PetscCallMPI(MPI_Comm_size(comm, &size));
3647     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3648 
3649     /*
3650         Determine the number of non-zeros in the diagonal and off-diagonal
3651         portions of the matrix in order to do correct preallocation
3652     */
3653 
3654     /* first get start and end of "diagonal" columns */
3655     PetscCall(ISGetLocalSize(iscol, &csize));
3656     if (csize == PETSC_DECIDE) {
3657       PetscCall(ISGetSize(isrow, &mglobal));
3658       if (mglobal == Ncols) { /* square matrix */
3659         nlocal = m;
3660       } else {
3661         nlocal = Ncols / size + ((Ncols % size) > rank);
3662       }
3663     } else {
3664       nlocal = csize;
3665     }
3666     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3667     rstart = rend - nlocal;
3668     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3669 
3670     /* next, compute all the lengths */
3671     jj = aij->j;
3672     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3673     olens = dlens + m;
3674     for (i = 0; i < m; i++) {
3675       jend = ii[i + 1] - ii[i];
3676       olen = 0;
3677       dlen = 0;
3678       for (j = 0; j < jend; j++) {
3679         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3680         else dlen++;
3681         jj++;
3682       }
3683       olens[i] = olen;
3684       dlens[i] = dlen;
3685     }
3686 
3687     PetscCall(ISGetBlockSize(isrow, &bs));
3688     PetscCall(ISGetBlockSize(iscol, &cbs));
3689 
3690     PetscCall(MatCreate(comm, &M));
3691     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3692     PetscCall(MatSetBlockSizes(M, bs, cbs));
3693     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3694     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3695     PetscCall(PetscFree(dlens));
3696 
3697   } else { /* call == MAT_REUSE_MATRIX */
3698     M = *newmat;
3699     PetscCall(MatGetLocalSize(M, &i, NULL));
3700     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3701     PetscCall(MatZeroEntries(M));
3702     /*
3703          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3704        rather than the slower MatSetValues().
3705     */
3706     M->was_assembled = PETSC_TRUE;
3707     M->assembled     = PETSC_FALSE;
3708   }
3709 
3710   /* (5) Set values of Msub to *newmat */
3711   PetscCall(PetscMalloc1(count, &colsub));
3712   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3713 
3714   jj = aij->j;
3715   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3716   for (i = 0; i < m; i++) {
3717     row = rstart + i;
3718     nz  = ii[i + 1] - ii[i];
3719     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3720     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3721     jj += nz;
3722     aa += nz;
3723   }
3724   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3725   PetscCall(ISRestoreIndices(iscmap, &cmap));
3726 
3727   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3728   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3729 
3730   PetscCall(PetscFree(colsub));
3731 
3732   /* save Msub, iscol_sub and iscmap used in processor for next request */
3733   if (call == MAT_INITIAL_MATRIX) {
3734     *newmat = M;
3735     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3736     PetscCall(MatDestroy(&Msub));
3737 
3738     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3739     PetscCall(ISDestroy(&iscol_sub));
3740 
3741     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3742     PetscCall(ISDestroy(&iscmap));
3743 
3744     if (iscol_local) {
3745       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3746       PetscCall(ISDestroy(&iscol_local));
3747     }
3748   }
3749   PetscFunctionReturn(PETSC_SUCCESS);
3750 }
3751 
3752 /*
3753     Not great since it makes two copies of the submatrix, first an SeqAIJ
3754   in local and then by concatenating the local matrices the end result.
3755   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3756 
3757   This requires a sequential iscol with all indices.
3758 */
3759 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3760 {
3761   PetscMPIInt rank, size;
3762   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3763   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3764   Mat         M, Mreuse;
3765   MatScalar  *aa, *vwork;
3766   MPI_Comm    comm;
3767   Mat_SeqAIJ *aij;
3768   PetscBool   colflag, allcolumns = PETSC_FALSE;
3769 
3770   PetscFunctionBegin;
3771   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3772   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3773   PetscCallMPI(MPI_Comm_size(comm, &size));
3774 
3775   /* Check for special case: each processor gets entire matrix columns */
3776   PetscCall(ISIdentity(iscol, &colflag));
3777   PetscCall(ISGetLocalSize(iscol, &n));
3778   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3779   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3780 
3781   if (call == MAT_REUSE_MATRIX) {
3782     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3783     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3784     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3785   } else {
3786     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3787   }
3788 
3789   /*
3790       m - number of local rows
3791       n - number of columns (same on all processors)
3792       rstart - first row in new global matrix generated
3793   */
3794   PetscCall(MatGetSize(Mreuse, &m, &n));
3795   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3796   if (call == MAT_INITIAL_MATRIX) {
3797     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3798     ii  = aij->i;
3799     jj  = aij->j;
3800 
3801     /*
3802         Determine the number of non-zeros in the diagonal and off-diagonal
3803         portions of the matrix in order to do correct preallocation
3804     */
3805 
3806     /* first get start and end of "diagonal" columns */
3807     if (csize == PETSC_DECIDE) {
3808       PetscCall(ISGetSize(isrow, &mglobal));
3809       if (mglobal == n) { /* square matrix */
3810         nlocal = m;
3811       } else {
3812         nlocal = n / size + ((n % size) > rank);
3813       }
3814     } else {
3815       nlocal = csize;
3816     }
3817     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3818     rstart = rend - nlocal;
3819     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3820 
3821     /* next, compute all the lengths */
3822     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3823     olens = dlens + m;
3824     for (i = 0; i < m; i++) {
3825       jend = ii[i + 1] - ii[i];
3826       olen = 0;
3827       dlen = 0;
3828       for (j = 0; j < jend; j++) {
3829         if (*jj < rstart || *jj >= rend) olen++;
3830         else dlen++;
3831         jj++;
3832       }
3833       olens[i] = olen;
3834       dlens[i] = dlen;
3835     }
3836     PetscCall(MatCreate(comm, &M));
3837     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3838     PetscCall(MatSetBlockSizes(M, bs, cbs));
3839     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3840     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3841     PetscCall(PetscFree(dlens));
3842   } else {
3843     PetscInt ml, nl;
3844 
3845     M = *newmat;
3846     PetscCall(MatGetLocalSize(M, &ml, &nl));
3847     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3848     PetscCall(MatZeroEntries(M));
3849     /*
3850          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3851        rather than the slower MatSetValues().
3852     */
3853     M->was_assembled = PETSC_TRUE;
3854     M->assembled     = PETSC_FALSE;
3855   }
3856   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3857   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3858   ii  = aij->i;
3859   jj  = aij->j;
3860 
3861   /* trigger copy to CPU if needed */
3862   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3863   for (i = 0; i < m; i++) {
3864     row   = rstart + i;
3865     nz    = ii[i + 1] - ii[i];
3866     cwork = jj;
3867     jj    = PetscSafePointerPlusOffset(jj, nz);
3868     vwork = aa;
3869     aa    = PetscSafePointerPlusOffset(aa, nz);
3870     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3871   }
3872   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3873 
3874   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3875   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3876   *newmat = M;
3877 
3878   /* save submatrix used in processor for next request */
3879   if (call == MAT_INITIAL_MATRIX) {
3880     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3881     PetscCall(MatDestroy(&Mreuse));
3882   }
3883   PetscFunctionReturn(PETSC_SUCCESS);
3884 }
3885 
3886 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3887 {
3888   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3889   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3890   const PetscInt *JJ;
3891   PetscBool       nooffprocentries;
3892   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3893 
3894   PetscFunctionBegin;
3895   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3896 
3897   PetscCall(PetscLayoutSetUp(B->rmap));
3898   PetscCall(PetscLayoutSetUp(B->cmap));
3899   m      = B->rmap->n;
3900   cstart = B->cmap->rstart;
3901   cend   = B->cmap->rend;
3902   rstart = B->rmap->rstart;
3903 
3904   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3905 
3906   if (PetscDefined(USE_DEBUG)) {
3907     for (i = 0; i < m; i++) {
3908       nnz = Ii[i + 1] - Ii[i];
3909       JJ  = PetscSafePointerPlusOffset(J, Ii[i]);
3910       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3911       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3912       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3913     }
3914   }
3915 
3916   for (i = 0; i < m; i++) {
3917     nnz     = Ii[i + 1] - Ii[i];
3918     JJ      = PetscSafePointerPlusOffset(J, Ii[i]);
3919     nnz_max = PetscMax(nnz_max, nnz);
3920     d       = 0;
3921     for (j = 0; j < nnz; j++) {
3922       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3923     }
3924     d_nnz[i] = d;
3925     o_nnz[i] = nnz - d;
3926   }
3927   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3928   PetscCall(PetscFree2(d_nnz, o_nnz));
3929 
3930   for (i = 0; i < m; i++) {
3931     ii = i + rstart;
3932     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES));
3933   }
3934   nooffprocentries    = B->nooffprocentries;
3935   B->nooffprocentries = PETSC_TRUE;
3936   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3937   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3938   B->nooffprocentries = nooffprocentries;
3939 
3940   /* count number of entries below block diagonal */
3941   PetscCall(PetscFree(Aij->ld));
3942   PetscCall(PetscCalloc1(m, &ld));
3943   Aij->ld = ld;
3944   for (i = 0; i < m; i++) {
3945     nnz = Ii[i + 1] - Ii[i];
3946     j   = 0;
3947     while (j < nnz && J[j] < cstart) j++;
3948     ld[i] = j;
3949     if (J) J += nnz;
3950   }
3951 
3952   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3953   PetscFunctionReturn(PETSC_SUCCESS);
3954 }
3955 
3956 /*@
3957   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3958   (the default parallel PETSc format).
3959 
3960   Collective
3961 
3962   Input Parameters:
3963 + B - the matrix
3964 . i - the indices into `j` for the start of each local row (indices start with zero)
3965 . j - the column indices for each local row (indices start with zero)
3966 - v - optional values in the matrix
3967 
3968   Level: developer
3969 
3970   Notes:
3971   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3972   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3973   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3974 
3975   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3976 
3977   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3978 
3979   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3980 
3981   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3982   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3983 
3984   The format which is used for the sparse matrix input, is equivalent to a
3985   row-major ordering.. i.e for the following matrix, the input data expected is
3986   as shown
3987 .vb
3988         1 0 0
3989         2 0 3     P0
3990        -------
3991         4 5 6     P1
3992 
3993      Process0 [P0] rows_owned=[0,1]
3994         i =  {0,1,3}  [size = nrow+1  = 2+1]
3995         j =  {0,0,2}  [size = 3]
3996         v =  {1,2,3}  [size = 3]
3997 
3998      Process1 [P1] rows_owned=[2]
3999         i =  {0,3}    [size = nrow+1  = 1+1]
4000         j =  {0,1,2}  [size = 3]
4001         v =  {4,5,6}  [size = 3]
4002 .ve
4003 
4004 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4005           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4006 @*/
4007 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4008 {
4009   PetscFunctionBegin;
4010   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4011   PetscFunctionReturn(PETSC_SUCCESS);
4012 }
4013 
4014 /*@C
4015   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4016   (the default parallel PETSc format).  For good matrix assembly performance
4017   the user should preallocate the matrix storage by setting the parameters
4018   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4019 
4020   Collective
4021 
4022   Input Parameters:
4023 + B     - the matrix
4024 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4025            (same value is used for all local rows)
4026 . d_nnz - array containing the number of nonzeros in the various rows of the
4027            DIAGONAL portion of the local submatrix (possibly different for each row)
4028            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4029            The size of this array is equal to the number of local rows, i.e 'm'.
4030            For matrices that will be factored, you must leave room for (and set)
4031            the diagonal entry even if it is zero.
4032 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4033            submatrix (same value is used for all local rows).
4034 - o_nnz - array containing the number of nonzeros in the various rows of the
4035            OFF-DIAGONAL portion of the local submatrix (possibly different for
4036            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4037            structure. The size of this array is equal to the number
4038            of local rows, i.e 'm'.
4039 
4040   Example Usage:
4041   Consider the following 8x8 matrix with 34 non-zero values, that is
4042   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4043   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4044   as follows
4045 
4046 .vb
4047             1  2  0  |  0  3  0  |  0  4
4048     Proc0   0  5  6  |  7  0  0  |  8  0
4049             9  0 10  | 11  0  0  | 12  0
4050     -------------------------------------
4051            13  0 14  | 15 16 17  |  0  0
4052     Proc1   0 18  0  | 19 20 21  |  0  0
4053             0  0  0  | 22 23  0  | 24  0
4054     -------------------------------------
4055     Proc2  25 26 27  |  0  0 28  | 29  0
4056            30  0  0  | 31 32 33  |  0 34
4057 .ve
4058 
4059   This can be represented as a collection of submatrices as
4060 .vb
4061       A B C
4062       D E F
4063       G H I
4064 .ve
4065 
4066   Where the submatrices A,B,C are owned by proc0, D,E,F are
4067   owned by proc1, G,H,I are owned by proc2.
4068 
4069   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4070   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4071   The 'M','N' parameters are 8,8, and have the same values on all procs.
4072 
4073   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4074   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4075   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4076   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4077   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4078   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4079 
4080   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4081   allocated for every row of the local diagonal submatrix, and `o_nz`
4082   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4083   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4084   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4085   In this case, the values of `d_nz`, `o_nz` are
4086 .vb
4087      proc0  dnz = 2, o_nz = 2
4088      proc1  dnz = 3, o_nz = 2
4089      proc2  dnz = 1, o_nz = 4
4090 .ve
4091   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4092   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4093   for proc3. i.e we are using 12+15+10=37 storage locations to store
4094   34 values.
4095 
4096   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4097   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4098   In the above case the values for `d_nnz`, `o_nnz` are
4099 .vb
4100      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4101      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4102      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4103 .ve
4104   Here the space allocated is sum of all the above values i.e 34, and
4105   hence pre-allocation is perfect.
4106 
4107   Level: intermediate
4108 
4109   Notes:
4110   If the *_nnz parameter is given then the *_nz parameter is ignored
4111 
4112   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4113   storage.  The stored row and column indices begin with zero.
4114   See [Sparse Matrices](sec_matsparse) for details.
4115 
4116   The parallel matrix is partitioned such that the first m0 rows belong to
4117   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4118   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4119 
4120   The DIAGONAL portion of the local submatrix of a processor can be defined
4121   as the submatrix which is obtained by extraction the part corresponding to
4122   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4123   first row that belongs to the processor, r2 is the last row belonging to
4124   the this processor, and c1-c2 is range of indices of the local part of a
4125   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4126   common case of a square matrix, the row and column ranges are the same and
4127   the DIAGONAL part is also square. The remaining portion of the local
4128   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4129 
4130   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4131 
4132   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4133   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4134   You can also run with the option `-info` and look for messages with the string
4135   malloc in them to see if additional memory allocation was needed.
4136 
4137 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4138           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4139 @*/
4140 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4141 {
4142   PetscFunctionBegin;
4143   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4144   PetscValidType(B, 1);
4145   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4146   PetscFunctionReturn(PETSC_SUCCESS);
4147 }
4148 
4149 /*@
4150   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4151   CSR format for the local rows.
4152 
4153   Collective
4154 
4155   Input Parameters:
4156 + comm - MPI communicator
4157 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4158 . n    - This value should be the same as the local size used in creating the
4159          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4160          calculated if `N` is given) For square matrices n is almost always `m`.
4161 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4162 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4163 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4164 . j    - global column indices
4165 - a    - optional matrix values
4166 
4167   Output Parameter:
4168 . mat - the matrix
4169 
4170   Level: intermediate
4171 
4172   Notes:
4173   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4174   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4175   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4176 
4177   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4178 
4179   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4180 
4181   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4182   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4183 
4184   The format which is used for the sparse matrix input, is equivalent to a
4185   row-major ordering, i.e., for the following matrix, the input data expected is
4186   as shown
4187 .vb
4188         1 0 0
4189         2 0 3     P0
4190        -------
4191         4 5 6     P1
4192 
4193      Process0 [P0] rows_owned=[0,1]
4194         i =  {0,1,3}  [size = nrow+1  = 2+1]
4195         j =  {0,0,2}  [size = 3]
4196         v =  {1,2,3}  [size = 3]
4197 
4198      Process1 [P1] rows_owned=[2]
4199         i =  {0,3}    [size = nrow+1  = 1+1]
4200         j =  {0,1,2}  [size = 3]
4201         v =  {4,5,6}  [size = 3]
4202 .ve
4203 
4204 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4205           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4206 @*/
4207 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4208 {
4209   PetscFunctionBegin;
4210   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4211   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4212   PetscCall(MatCreate(comm, mat));
4213   PetscCall(MatSetSizes(*mat, m, n, M, N));
4214   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4215   PetscCall(MatSetType(*mat, MATMPIAIJ));
4216   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4217   PetscFunctionReturn(PETSC_SUCCESS);
4218 }
4219 
4220 /*@
4221   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4222   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4223   from `MatCreateMPIAIJWithArrays()`
4224 
4225   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4226 
4227   Collective
4228 
4229   Input Parameters:
4230 + mat - the matrix
4231 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4232 . n   - This value should be the same as the local size used in creating the
4233        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4234        calculated if N is given) For square matrices n is almost always m.
4235 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4236 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4237 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4238 . J   - column indices
4239 - v   - matrix values
4240 
4241   Level: deprecated
4242 
4243 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4244           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4245 @*/
4246 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4247 {
4248   PetscInt        nnz, i;
4249   PetscBool       nooffprocentries;
4250   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4251   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4252   PetscScalar    *ad, *ao;
4253   PetscInt        ldi, Iii, md;
4254   const PetscInt *Adi = Ad->i;
4255   PetscInt       *ld  = Aij->ld;
4256 
4257   PetscFunctionBegin;
4258   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4259   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4260   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4261   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4262 
4263   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4264   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4265 
4266   for (i = 0; i < m; i++) {
4267     if (PetscDefined(USE_DEBUG)) {
4268       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4269         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4270         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4271       }
4272     }
4273     nnz = Ii[i + 1] - Ii[i];
4274     Iii = Ii[i];
4275     ldi = ld[i];
4276     md  = Adi[i + 1] - Adi[i];
4277     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4278     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4279     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4280     ad += md;
4281     ao += nnz - md;
4282   }
4283   nooffprocentries      = mat->nooffprocentries;
4284   mat->nooffprocentries = PETSC_TRUE;
4285   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4286   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4287   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4288   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4289   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4290   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4291   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4292   mat->nooffprocentries = nooffprocentries;
4293   PetscFunctionReturn(PETSC_SUCCESS);
4294 }
4295 
4296 /*@
4297   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4298 
4299   Collective
4300 
4301   Input Parameters:
4302 + mat - the matrix
4303 - v   - matrix values, stored by row
4304 
4305   Level: intermediate
4306 
4307   Notes:
4308   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4309 
4310   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4311 
4312 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4313           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4314 @*/
4315 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4316 {
4317   PetscInt        nnz, i, m;
4318   PetscBool       nooffprocentries;
4319   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4320   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4321   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4322   PetscScalar    *ad, *ao;
4323   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4324   PetscInt        ldi, Iii, md;
4325   PetscInt       *ld = Aij->ld;
4326 
4327   PetscFunctionBegin;
4328   m = mat->rmap->n;
4329 
4330   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4331   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4332   Iii = 0;
4333   for (i = 0; i < m; i++) {
4334     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4335     ldi = ld[i];
4336     md  = Adi[i + 1] - Adi[i];
4337     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4338     ad += md;
4339     if (ao) {
4340       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4341       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4342       ao += nnz - md;
4343     }
4344     Iii += nnz;
4345   }
4346   nooffprocentries      = mat->nooffprocentries;
4347   mat->nooffprocentries = PETSC_TRUE;
4348   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4349   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4350   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4351   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4352   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4353   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4354   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4355   mat->nooffprocentries = nooffprocentries;
4356   PetscFunctionReturn(PETSC_SUCCESS);
4357 }
4358 
4359 /*@C
4360   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4361   (the default parallel PETSc format).  For good matrix assembly performance
4362   the user should preallocate the matrix storage by setting the parameters
4363   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4364 
4365   Collective
4366 
4367   Input Parameters:
4368 + comm  - MPI communicator
4369 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4370           This value should be the same as the local size used in creating the
4371           y vector for the matrix-vector product y = Ax.
4372 . n     - This value should be the same as the local size used in creating the
4373           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4374           calculated if N is given) For square matrices n is almost always m.
4375 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4376 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4377 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4378           (same value is used for all local rows)
4379 . d_nnz - array containing the number of nonzeros in the various rows of the
4380           DIAGONAL portion of the local submatrix (possibly different for each row)
4381           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4382           The size of this array is equal to the number of local rows, i.e 'm'.
4383 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4384           submatrix (same value is used for all local rows).
4385 - o_nnz - array containing the number of nonzeros in the various rows of the
4386           OFF-DIAGONAL portion of the local submatrix (possibly different for
4387           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4388           structure. The size of this array is equal to the number
4389           of local rows, i.e 'm'.
4390 
4391   Output Parameter:
4392 . A - the matrix
4393 
4394   Options Database Keys:
4395 + -mat_no_inode                     - Do not use inodes
4396 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4397 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4398                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4399                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4400 
4401   Level: intermediate
4402 
4403   Notes:
4404   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4405   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4406   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4407 
4408   If the *_nnz parameter is given then the *_nz parameter is ignored
4409 
4410   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4411   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4412   storage requirements for this matrix.
4413 
4414   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4415   processor than it must be used on all processors that share the object for
4416   that argument.
4417 
4418   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4419   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4420 
4421   The user MUST specify either the local or global matrix dimensions
4422   (possibly both).
4423 
4424   The parallel matrix is partitioned across processors such that the
4425   first `m0` rows belong to process 0, the next `m1` rows belong to
4426   process 1, the next `m2` rows belong to process 2, etc., where
4427   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4428   values corresponding to [m x N] submatrix.
4429 
4430   The columns are logically partitioned with the n0 columns belonging
4431   to 0th partition, the next n1 columns belonging to the next
4432   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4433 
4434   The DIAGONAL portion of the local submatrix on any given processor
4435   is the submatrix corresponding to the rows and columns m,n
4436   corresponding to the given processor. i.e diagonal matrix on
4437   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4438   etc. The remaining portion of the local submatrix [m x (N-n)]
4439   constitute the OFF-DIAGONAL portion. The example below better
4440   illustrates this concept.
4441 
4442   For a square global matrix we define each processor's diagonal portion
4443   to be its local rows and the corresponding columns (a square submatrix);
4444   each processor's off-diagonal portion encompasses the remainder of the
4445   local matrix (a rectangular submatrix).
4446 
4447   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4448 
4449   When calling this routine with a single process communicator, a matrix of
4450   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4451   type of communicator, use the construction mechanism
4452 .vb
4453   MatCreate(..., &A);
4454   MatSetType(A, MATMPIAIJ);
4455   MatSetSizes(A, m, n, M, N);
4456   MatMPIAIJSetPreallocation(A, ...);
4457 .ve
4458 
4459   By default, this format uses inodes (identical nodes) when possible.
4460   We search for consecutive rows with the same nonzero structure, thereby
4461   reusing matrix information to achieve increased efficiency.
4462 
4463   Example Usage:
4464   Consider the following 8x8 matrix with 34 non-zero values, that is
4465   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4466   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4467   as follows
4468 
4469 .vb
4470             1  2  0  |  0  3  0  |  0  4
4471     Proc0   0  5  6  |  7  0  0  |  8  0
4472             9  0 10  | 11  0  0  | 12  0
4473     -------------------------------------
4474            13  0 14  | 15 16 17  |  0  0
4475     Proc1   0 18  0  | 19 20 21  |  0  0
4476             0  0  0  | 22 23  0  | 24  0
4477     -------------------------------------
4478     Proc2  25 26 27  |  0  0 28  | 29  0
4479            30  0  0  | 31 32 33  |  0 34
4480 .ve
4481 
4482   This can be represented as a collection of submatrices as
4483 
4484 .vb
4485       A B C
4486       D E F
4487       G H I
4488 .ve
4489 
4490   Where the submatrices A,B,C are owned by proc0, D,E,F are
4491   owned by proc1, G,H,I are owned by proc2.
4492 
4493   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4494   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4495   The 'M','N' parameters are 8,8, and have the same values on all procs.
4496 
4497   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4498   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4499   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4500   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4501   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4502   matrix, ans [DF] as another SeqAIJ matrix.
4503 
4504   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4505   allocated for every row of the local diagonal submatrix, and `o_nz`
4506   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4507   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4508   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4509   In this case, the values of `d_nz`,`o_nz` are
4510 .vb
4511      proc0  dnz = 2, o_nz = 2
4512      proc1  dnz = 3, o_nz = 2
4513      proc2  dnz = 1, o_nz = 4
4514 .ve
4515   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4516   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4517   for proc3. i.e we are using 12+15+10=37 storage locations to store
4518   34 values.
4519 
4520   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4521   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4522   In the above case the values for d_nnz,o_nnz are
4523 .vb
4524      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4525      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4526      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4527 .ve
4528   Here the space allocated is sum of all the above values i.e 34, and
4529   hence pre-allocation is perfect.
4530 
4531 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4532           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4533           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4534 @*/
4535 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4536 {
4537   PetscMPIInt size;
4538 
4539   PetscFunctionBegin;
4540   PetscCall(MatCreate(comm, A));
4541   PetscCall(MatSetSizes(*A, m, n, M, N));
4542   PetscCallMPI(MPI_Comm_size(comm, &size));
4543   if (size > 1) {
4544     PetscCall(MatSetType(*A, MATMPIAIJ));
4545     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4546   } else {
4547     PetscCall(MatSetType(*A, MATSEQAIJ));
4548     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4549   }
4550   PetscFunctionReturn(PETSC_SUCCESS);
4551 }
4552 
4553 /*MC
4554     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4555 
4556     Synopsis:
4557     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4558 
4559     Not Collective
4560 
4561     Input Parameter:
4562 .   A - the `MATMPIAIJ` matrix
4563 
4564     Output Parameters:
4565 +   Ad - the diagonal portion of the matrix
4566 .   Ao - the off-diagonal portion of the matrix
4567 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4568 -   ierr - error code
4569 
4570      Level: advanced
4571 
4572     Note:
4573     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4574 
4575 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4576 M*/
4577 
4578 /*MC
4579     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4580 
4581     Synopsis:
4582     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4583 
4584     Not Collective
4585 
4586     Input Parameters:
4587 +   A - the `MATMPIAIJ` matrix
4588 .   Ad - the diagonal portion of the matrix
4589 .   Ao - the off-diagonal portion of the matrix
4590 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4591 -   ierr - error code
4592 
4593      Level: advanced
4594 
4595 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4596 M*/
4597 
4598 /*@C
4599   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4600 
4601   Not Collective
4602 
4603   Input Parameter:
4604 . A - The `MATMPIAIJ` matrix
4605 
4606   Output Parameters:
4607 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4608 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4609 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4610 
4611   Level: intermediate
4612 
4613   Note:
4614   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4615   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4616   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4617   local column numbers to global column numbers in the original matrix.
4618 
4619   Fortran Notes:
4620   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4621 
4622 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4623 @*/
4624 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4625 {
4626   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4627   PetscBool   flg;
4628 
4629   PetscFunctionBegin;
4630   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4631   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4632   if (Ad) *Ad = a->A;
4633   if (Ao) *Ao = a->B;
4634   if (colmap) *colmap = a->garray;
4635   PetscFunctionReturn(PETSC_SUCCESS);
4636 }
4637 
4638 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4639 {
4640   PetscInt     m, N, i, rstart, nnz, Ii;
4641   PetscInt    *indx;
4642   PetscScalar *values;
4643   MatType      rootType;
4644 
4645   PetscFunctionBegin;
4646   PetscCall(MatGetSize(inmat, &m, &N));
4647   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4648     PetscInt *dnz, *onz, sum, bs, cbs;
4649 
4650     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4651     /* Check sum(n) = N */
4652     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4653     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4654 
4655     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4656     rstart -= m;
4657 
4658     MatPreallocateBegin(comm, m, n, dnz, onz);
4659     for (i = 0; i < m; i++) {
4660       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4661       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4662       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4663     }
4664 
4665     PetscCall(MatCreate(comm, outmat));
4666     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4667     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4668     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4669     PetscCall(MatGetRootType_Private(inmat, &rootType));
4670     PetscCall(MatSetType(*outmat, rootType));
4671     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4672     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4673     MatPreallocateEnd(dnz, onz);
4674     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4675   }
4676 
4677   /* numeric phase */
4678   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4679   for (i = 0; i < m; i++) {
4680     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4681     Ii = i + rstart;
4682     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4683     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4684   }
4685   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4686   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4687   PetscFunctionReturn(PETSC_SUCCESS);
4688 }
4689 
4690 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4691 {
4692   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4693 
4694   PetscFunctionBegin;
4695   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4696   PetscCall(PetscFree(merge->id_r));
4697   PetscCall(PetscFree(merge->len_s));
4698   PetscCall(PetscFree(merge->len_r));
4699   PetscCall(PetscFree(merge->bi));
4700   PetscCall(PetscFree(merge->bj));
4701   PetscCall(PetscFree(merge->buf_ri[0]));
4702   PetscCall(PetscFree(merge->buf_ri));
4703   PetscCall(PetscFree(merge->buf_rj[0]));
4704   PetscCall(PetscFree(merge->buf_rj));
4705   PetscCall(PetscFree(merge->coi));
4706   PetscCall(PetscFree(merge->coj));
4707   PetscCall(PetscFree(merge->owners_co));
4708   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4709   PetscCall(PetscFree(merge));
4710   PetscFunctionReturn(PETSC_SUCCESS);
4711 }
4712 
4713 #include <../src/mat/utils/freespace.h>
4714 #include <petscbt.h>
4715 
4716 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4717 {
4718   MPI_Comm             comm;
4719   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4720   PetscMPIInt          size, rank, taga, *len_s;
4721   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4722   PetscInt             proc, m;
4723   PetscInt           **buf_ri, **buf_rj;
4724   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4725   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4726   MPI_Request         *s_waits, *r_waits;
4727   MPI_Status          *status;
4728   const MatScalar     *aa, *a_a;
4729   MatScalar          **abuf_r, *ba_i;
4730   Mat_Merge_SeqsToMPI *merge;
4731   PetscContainer       container;
4732 
4733   PetscFunctionBegin;
4734   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4735   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4736 
4737   PetscCallMPI(MPI_Comm_size(comm, &size));
4738   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4739 
4740   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4741   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4742   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4743   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4744   aa = a_a;
4745 
4746   bi     = merge->bi;
4747   bj     = merge->bj;
4748   buf_ri = merge->buf_ri;
4749   buf_rj = merge->buf_rj;
4750 
4751   PetscCall(PetscMalloc1(size, &status));
4752   owners = merge->rowmap->range;
4753   len_s  = merge->len_s;
4754 
4755   /* send and recv matrix values */
4756   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4757   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4758 
4759   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4760   for (proc = 0, k = 0; proc < size; proc++) {
4761     if (!len_s[proc]) continue;
4762     i = owners[proc];
4763     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4764     k++;
4765   }
4766 
4767   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4768   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4769   PetscCall(PetscFree(status));
4770 
4771   PetscCall(PetscFree(s_waits));
4772   PetscCall(PetscFree(r_waits));
4773 
4774   /* insert mat values of mpimat */
4775   PetscCall(PetscMalloc1(N, &ba_i));
4776   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4777 
4778   for (k = 0; k < merge->nrecv; k++) {
4779     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4780     nrows       = *buf_ri_k[k];
4781     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4782     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4783   }
4784 
4785   /* set values of ba */
4786   m = merge->rowmap->n;
4787   for (i = 0; i < m; i++) {
4788     arow = owners[rank] + i;
4789     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4790     bnzi = bi[i + 1] - bi[i];
4791     PetscCall(PetscArrayzero(ba_i, bnzi));
4792 
4793     /* add local non-zero vals of this proc's seqmat into ba */
4794     anzi   = ai[arow + 1] - ai[arow];
4795     aj     = a->j + ai[arow];
4796     aa     = a_a + ai[arow];
4797     nextaj = 0;
4798     for (j = 0; nextaj < anzi; j++) {
4799       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4800         ba_i[j] += aa[nextaj++];
4801       }
4802     }
4803 
4804     /* add received vals into ba */
4805     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4806       /* i-th row */
4807       if (i == *nextrow[k]) {
4808         anzi   = *(nextai[k] + 1) - *nextai[k];
4809         aj     = buf_rj[k] + *nextai[k];
4810         aa     = abuf_r[k] + *nextai[k];
4811         nextaj = 0;
4812         for (j = 0; nextaj < anzi; j++) {
4813           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4814             ba_i[j] += aa[nextaj++];
4815           }
4816         }
4817         nextrow[k]++;
4818         nextai[k]++;
4819       }
4820     }
4821     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4822   }
4823   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4824   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4825   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4826 
4827   PetscCall(PetscFree(abuf_r[0]));
4828   PetscCall(PetscFree(abuf_r));
4829   PetscCall(PetscFree(ba_i));
4830   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4831   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4832   PetscFunctionReturn(PETSC_SUCCESS);
4833 }
4834 
4835 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4836 {
4837   Mat                  B_mpi;
4838   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4839   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4840   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4841   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4842   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4843   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4844   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4845   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4846   MPI_Status          *status;
4847   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4848   PetscBT              lnkbt;
4849   Mat_Merge_SeqsToMPI *merge;
4850   PetscContainer       container;
4851 
4852   PetscFunctionBegin;
4853   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4854 
4855   /* make sure it is a PETSc comm */
4856   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4857   PetscCallMPI(MPI_Comm_size(comm, &size));
4858   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4859 
4860   PetscCall(PetscNew(&merge));
4861   PetscCall(PetscMalloc1(size, &status));
4862 
4863   /* determine row ownership */
4864   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4865   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4866   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4867   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4868   PetscCall(PetscLayoutSetUp(merge->rowmap));
4869   PetscCall(PetscMalloc1(size, &len_si));
4870   PetscCall(PetscMalloc1(size, &merge->len_s));
4871 
4872   m      = merge->rowmap->n;
4873   owners = merge->rowmap->range;
4874 
4875   /* determine the number of messages to send, their lengths */
4876   len_s = merge->len_s;
4877 
4878   len          = 0; /* length of buf_si[] */
4879   merge->nsend = 0;
4880   for (proc = 0; proc < size; proc++) {
4881     len_si[proc] = 0;
4882     if (proc == rank) {
4883       len_s[proc] = 0;
4884     } else {
4885       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4886       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4887     }
4888     if (len_s[proc]) {
4889       merge->nsend++;
4890       nrows = 0;
4891       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4892         if (ai[i + 1] > ai[i]) nrows++;
4893       }
4894       len_si[proc] = 2 * (nrows + 1);
4895       len += len_si[proc];
4896     }
4897   }
4898 
4899   /* determine the number and length of messages to receive for ij-structure */
4900   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4901   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4902 
4903   /* post the Irecv of j-structure */
4904   PetscCall(PetscCommGetNewTag(comm, &tagj));
4905   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4906 
4907   /* post the Isend of j-structure */
4908   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4909 
4910   for (proc = 0, k = 0; proc < size; proc++) {
4911     if (!len_s[proc]) continue;
4912     i = owners[proc];
4913     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4914     k++;
4915   }
4916 
4917   /* receives and sends of j-structure are complete */
4918   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4919   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4920 
4921   /* send and recv i-structure */
4922   PetscCall(PetscCommGetNewTag(comm, &tagi));
4923   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4924 
4925   PetscCall(PetscMalloc1(len + 1, &buf_s));
4926   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4927   for (proc = 0, k = 0; proc < size; proc++) {
4928     if (!len_s[proc]) continue;
4929     /* form outgoing message for i-structure:
4930          buf_si[0]:                 nrows to be sent
4931                [1:nrows]:           row index (global)
4932                [nrows+1:2*nrows+1]: i-structure index
4933     */
4934     nrows       = len_si[proc] / 2 - 1;
4935     buf_si_i    = buf_si + nrows + 1;
4936     buf_si[0]   = nrows;
4937     buf_si_i[0] = 0;
4938     nrows       = 0;
4939     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4940       anzi = ai[i + 1] - ai[i];
4941       if (anzi) {
4942         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4943         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4944         nrows++;
4945       }
4946     }
4947     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4948     k++;
4949     buf_si += len_si[proc];
4950   }
4951 
4952   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4953   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4954 
4955   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4956   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4957 
4958   PetscCall(PetscFree(len_si));
4959   PetscCall(PetscFree(len_ri));
4960   PetscCall(PetscFree(rj_waits));
4961   PetscCall(PetscFree2(si_waits, sj_waits));
4962   PetscCall(PetscFree(ri_waits));
4963   PetscCall(PetscFree(buf_s));
4964   PetscCall(PetscFree(status));
4965 
4966   /* compute a local seq matrix in each processor */
4967   /* allocate bi array and free space for accumulating nonzero column info */
4968   PetscCall(PetscMalloc1(m + 1, &bi));
4969   bi[0] = 0;
4970 
4971   /* create and initialize a linked list */
4972   nlnk = N + 1;
4973   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4974 
4975   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4976   len = ai[owners[rank + 1]] - ai[owners[rank]];
4977   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4978 
4979   current_space = free_space;
4980 
4981   /* determine symbolic info for each local row */
4982   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4983 
4984   for (k = 0; k < merge->nrecv; k++) {
4985     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4986     nrows       = *buf_ri_k[k];
4987     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4988     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4989   }
4990 
4991   MatPreallocateBegin(comm, m, n, dnz, onz);
4992   len = 0;
4993   for (i = 0; i < m; i++) {
4994     bnzi = 0;
4995     /* add local non-zero cols of this proc's seqmat into lnk */
4996     arow = owners[rank] + i;
4997     anzi = ai[arow + 1] - ai[arow];
4998     aj   = a->j + ai[arow];
4999     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5000     bnzi += nlnk;
5001     /* add received col data into lnk */
5002     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5003       if (i == *nextrow[k]) {            /* i-th row */
5004         anzi = *(nextai[k] + 1) - *nextai[k];
5005         aj   = buf_rj[k] + *nextai[k];
5006         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5007         bnzi += nlnk;
5008         nextrow[k]++;
5009         nextai[k]++;
5010       }
5011     }
5012     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5013 
5014     /* if free space is not available, make more free space */
5015     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5016     /* copy data into free space, then initialize lnk */
5017     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5018     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5019 
5020     current_space->array += bnzi;
5021     current_space->local_used += bnzi;
5022     current_space->local_remaining -= bnzi;
5023 
5024     bi[i + 1] = bi[i] + bnzi;
5025   }
5026 
5027   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5028 
5029   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5030   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5031   PetscCall(PetscLLDestroy(lnk, lnkbt));
5032 
5033   /* create symbolic parallel matrix B_mpi */
5034   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5035   PetscCall(MatCreate(comm, &B_mpi));
5036   if (n == PETSC_DECIDE) {
5037     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5038   } else {
5039     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5040   }
5041   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5042   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5043   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5044   MatPreallocateEnd(dnz, onz);
5045   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5046 
5047   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5048   B_mpi->assembled = PETSC_FALSE;
5049   merge->bi        = bi;
5050   merge->bj        = bj;
5051   merge->buf_ri    = buf_ri;
5052   merge->buf_rj    = buf_rj;
5053   merge->coi       = NULL;
5054   merge->coj       = NULL;
5055   merge->owners_co = NULL;
5056 
5057   PetscCall(PetscCommDestroy(&comm));
5058 
5059   /* attach the supporting struct to B_mpi for reuse */
5060   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5061   PetscCall(PetscContainerSetPointer(container, merge));
5062   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5063   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5064   PetscCall(PetscContainerDestroy(&container));
5065   *mpimat = B_mpi;
5066 
5067   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5068   PetscFunctionReturn(PETSC_SUCCESS);
5069 }
5070 
5071 /*@
5072   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5073   matrices from each processor
5074 
5075   Collective
5076 
5077   Input Parameters:
5078 + comm   - the communicators the parallel matrix will live on
5079 . seqmat - the input sequential matrices
5080 . m      - number of local rows (or `PETSC_DECIDE`)
5081 . n      - number of local columns (or `PETSC_DECIDE`)
5082 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5083 
5084   Output Parameter:
5085 . mpimat - the parallel matrix generated
5086 
5087   Level: advanced
5088 
5089   Note:
5090   The dimensions of the sequential matrix in each processor MUST be the same.
5091   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5092   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5093 
5094 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5095 @*/
5096 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5097 {
5098   PetscMPIInt size;
5099 
5100   PetscFunctionBegin;
5101   PetscCallMPI(MPI_Comm_size(comm, &size));
5102   if (size == 1) {
5103     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5104     if (scall == MAT_INITIAL_MATRIX) {
5105       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5106     } else {
5107       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5108     }
5109     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5110     PetscFunctionReturn(PETSC_SUCCESS);
5111   }
5112   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5113   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5114   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5115   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5116   PetscFunctionReturn(PETSC_SUCCESS);
5117 }
5118 
5119 /*@
5120   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5121 
5122   Not Collective
5123 
5124   Input Parameter:
5125 . A - the matrix
5126 
5127   Output Parameter:
5128 . A_loc - the local sequential matrix generated
5129 
5130   Level: developer
5131 
5132   Notes:
5133   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5134   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5135   `n` is the global column count obtained with `MatGetSize()`
5136 
5137   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5138 
5139   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5140 
5141   Destroy the matrix with `MatDestroy()`
5142 
5143 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5144 @*/
5145 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5146 {
5147   PetscBool mpi;
5148 
5149   PetscFunctionBegin;
5150   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5151   if (mpi) {
5152     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5153   } else {
5154     *A_loc = A;
5155     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5156   }
5157   PetscFunctionReturn(PETSC_SUCCESS);
5158 }
5159 
5160 /*@
5161   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5162 
5163   Not Collective
5164 
5165   Input Parameters:
5166 + A     - the matrix
5167 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5168 
5169   Output Parameter:
5170 . A_loc - the local sequential matrix generated
5171 
5172   Level: developer
5173 
5174   Notes:
5175   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5176   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5177   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5178 
5179   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5180 
5181   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5182   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5183   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5184   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5185 
5186 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5187 @*/
5188 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5189 {
5190   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5191   Mat_SeqAIJ        *mat, *a, *b;
5192   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5193   const PetscScalar *aa, *ba, *aav, *bav;
5194   PetscScalar       *ca, *cam;
5195   PetscMPIInt        size;
5196   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5197   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5198   PetscBool          match;
5199 
5200   PetscFunctionBegin;
5201   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5202   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5203   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5204   if (size == 1) {
5205     if (scall == MAT_INITIAL_MATRIX) {
5206       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5207       *A_loc = mpimat->A;
5208     } else if (scall == MAT_REUSE_MATRIX) {
5209       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5210     }
5211     PetscFunctionReturn(PETSC_SUCCESS);
5212   }
5213 
5214   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5215   a  = (Mat_SeqAIJ *)mpimat->A->data;
5216   b  = (Mat_SeqAIJ *)mpimat->B->data;
5217   ai = a->i;
5218   aj = a->j;
5219   bi = b->i;
5220   bj = b->j;
5221   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5222   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5223   aa = aav;
5224   ba = bav;
5225   if (scall == MAT_INITIAL_MATRIX) {
5226     PetscCall(PetscMalloc1(1 + am, &ci));
5227     ci[0] = 0;
5228     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5229     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5230     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5231     k = 0;
5232     for (i = 0; i < am; i++) {
5233       ncols_o = bi[i + 1] - bi[i];
5234       ncols_d = ai[i + 1] - ai[i];
5235       /* off-diagonal portion of A */
5236       for (jo = 0; jo < ncols_o; jo++) {
5237         col = cmap[*bj];
5238         if (col >= cstart) break;
5239         cj[k] = col;
5240         bj++;
5241         ca[k++] = *ba++;
5242       }
5243       /* diagonal portion of A */
5244       for (j = 0; j < ncols_d; j++) {
5245         cj[k]   = cstart + *aj++;
5246         ca[k++] = *aa++;
5247       }
5248       /* off-diagonal portion of A */
5249       for (j = jo; j < ncols_o; j++) {
5250         cj[k]   = cmap[*bj++];
5251         ca[k++] = *ba++;
5252       }
5253     }
5254     /* put together the new matrix */
5255     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5256     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5257     /* Since these are PETSc arrays, change flags to free them as necessary. */
5258     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5259     mat->free_a  = PETSC_TRUE;
5260     mat->free_ij = PETSC_TRUE;
5261     mat->nonew   = 0;
5262   } else if (scall == MAT_REUSE_MATRIX) {
5263     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5264     ci  = mat->i;
5265     cj  = mat->j;
5266     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5267     for (i = 0; i < am; i++) {
5268       /* off-diagonal portion of A */
5269       ncols_o = bi[i + 1] - bi[i];
5270       for (jo = 0; jo < ncols_o; jo++) {
5271         col = cmap[*bj];
5272         if (col >= cstart) break;
5273         *cam++ = *ba++;
5274         bj++;
5275       }
5276       /* diagonal portion of A */
5277       ncols_d = ai[i + 1] - ai[i];
5278       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5279       /* off-diagonal portion of A */
5280       for (j = jo; j < ncols_o; j++) {
5281         *cam++ = *ba++;
5282         bj++;
5283       }
5284     }
5285     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5286   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5287   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5288   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5289   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5290   PetscFunctionReturn(PETSC_SUCCESS);
5291 }
5292 
5293 /*@
5294   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5295   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5296 
5297   Not Collective
5298 
5299   Input Parameters:
5300 + A     - the matrix
5301 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5302 
5303   Output Parameters:
5304 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5305 - A_loc - the local sequential matrix generated
5306 
5307   Level: developer
5308 
5309   Note:
5310   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5311   part, then those associated with the off-diagonal part (in its local ordering)
5312 
5313 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5314 @*/
5315 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5316 {
5317   Mat             Ao, Ad;
5318   const PetscInt *cmap;
5319   PetscMPIInt     size;
5320   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5321 
5322   PetscFunctionBegin;
5323   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5324   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5325   if (size == 1) {
5326     if (scall == MAT_INITIAL_MATRIX) {
5327       PetscCall(PetscObjectReference((PetscObject)Ad));
5328       *A_loc = Ad;
5329     } else if (scall == MAT_REUSE_MATRIX) {
5330       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5331     }
5332     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5333     PetscFunctionReturn(PETSC_SUCCESS);
5334   }
5335   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5336   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5337   if (f) {
5338     PetscCall((*f)(A, scall, glob, A_loc));
5339   } else {
5340     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5341     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5342     Mat_SeqAIJ        *c;
5343     PetscInt          *ai = a->i, *aj = a->j;
5344     PetscInt          *bi = b->i, *bj = b->j;
5345     PetscInt          *ci, *cj;
5346     const PetscScalar *aa, *ba;
5347     PetscScalar       *ca;
5348     PetscInt           i, j, am, dn, on;
5349 
5350     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5351     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5352     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5353     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5354     if (scall == MAT_INITIAL_MATRIX) {
5355       PetscInt k;
5356       PetscCall(PetscMalloc1(1 + am, &ci));
5357       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5358       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5359       ci[0] = 0;
5360       for (i = 0, k = 0; i < am; i++) {
5361         const PetscInt ncols_o = bi[i + 1] - bi[i];
5362         const PetscInt ncols_d = ai[i + 1] - ai[i];
5363         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5364         /* diagonal portion of A */
5365         for (j = 0; j < ncols_d; j++, k++) {
5366           cj[k] = *aj++;
5367           ca[k] = *aa++;
5368         }
5369         /* off-diagonal portion of A */
5370         for (j = 0; j < ncols_o; j++, k++) {
5371           cj[k] = dn + *bj++;
5372           ca[k] = *ba++;
5373         }
5374       }
5375       /* put together the new matrix */
5376       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5377       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5378       /* Since these are PETSc arrays, change flags to free them as necessary. */
5379       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5380       c->free_a  = PETSC_TRUE;
5381       c->free_ij = PETSC_TRUE;
5382       c->nonew   = 0;
5383       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5384     } else if (scall == MAT_REUSE_MATRIX) {
5385       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5386       for (i = 0; i < am; i++) {
5387         const PetscInt ncols_d = ai[i + 1] - ai[i];
5388         const PetscInt ncols_o = bi[i + 1] - bi[i];
5389         /* diagonal portion of A */
5390         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5391         /* off-diagonal portion of A */
5392         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5393       }
5394       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5395     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5396     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5397     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5398     if (glob) {
5399       PetscInt cst, *gidx;
5400 
5401       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5402       PetscCall(PetscMalloc1(dn + on, &gidx));
5403       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5404       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5405       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5406     }
5407   }
5408   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5409   PetscFunctionReturn(PETSC_SUCCESS);
5410 }
5411 
5412 /*@C
5413   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5414 
5415   Not Collective
5416 
5417   Input Parameters:
5418 + A     - the matrix
5419 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5420 . row   - index set of rows to extract (or `NULL`)
5421 - col   - index set of columns to extract (or `NULL`)
5422 
5423   Output Parameter:
5424 . A_loc - the local sequential matrix generated
5425 
5426   Level: developer
5427 
5428 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5429 @*/
5430 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5431 {
5432   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5433   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5434   IS          isrowa, iscola;
5435   Mat        *aloc;
5436   PetscBool   match;
5437 
5438   PetscFunctionBegin;
5439   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5440   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5441   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5442   if (!row) {
5443     start = A->rmap->rstart;
5444     end   = A->rmap->rend;
5445     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5446   } else {
5447     isrowa = *row;
5448   }
5449   if (!col) {
5450     start = A->cmap->rstart;
5451     cmap  = a->garray;
5452     nzA   = a->A->cmap->n;
5453     nzB   = a->B->cmap->n;
5454     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5455     ncols = 0;
5456     for (i = 0; i < nzB; i++) {
5457       if (cmap[i] < start) idx[ncols++] = cmap[i];
5458       else break;
5459     }
5460     imark = i;
5461     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5462     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5463     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5464   } else {
5465     iscola = *col;
5466   }
5467   if (scall != MAT_INITIAL_MATRIX) {
5468     PetscCall(PetscMalloc1(1, &aloc));
5469     aloc[0] = *A_loc;
5470   }
5471   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5472   if (!col) { /* attach global id of condensed columns */
5473     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5474   }
5475   *A_loc = aloc[0];
5476   PetscCall(PetscFree(aloc));
5477   if (!row) PetscCall(ISDestroy(&isrowa));
5478   if (!col) PetscCall(ISDestroy(&iscola));
5479   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5480   PetscFunctionReturn(PETSC_SUCCESS);
5481 }
5482 
5483 /*
5484  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5485  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5486  * on a global size.
5487  * */
5488 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5489 {
5490   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5491   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5492   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5493   PetscMPIInt            owner;
5494   PetscSFNode           *iremote, *oiremote;
5495   const PetscInt        *lrowindices;
5496   PetscSF                sf, osf;
5497   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5498   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5499   MPI_Comm               comm;
5500   ISLocalToGlobalMapping mapping;
5501   const PetscScalar     *pd_a, *po_a;
5502 
5503   PetscFunctionBegin;
5504   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5505   /* plocalsize is the number of roots
5506    * nrows is the number of leaves
5507    * */
5508   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5509   PetscCall(ISGetLocalSize(rows, &nrows));
5510   PetscCall(PetscCalloc1(nrows, &iremote));
5511   PetscCall(ISGetIndices(rows, &lrowindices));
5512   for (i = 0; i < nrows; i++) {
5513     /* Find a remote index and an owner for a row
5514      * The row could be local or remote
5515      * */
5516     owner = 0;
5517     lidx  = 0;
5518     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5519     iremote[i].index = lidx;
5520     iremote[i].rank  = owner;
5521   }
5522   /* Create SF to communicate how many nonzero columns for each row */
5523   PetscCall(PetscSFCreate(comm, &sf));
5524   /* SF will figure out the number of nonzero columns for each row, and their
5525    * offsets
5526    * */
5527   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5528   PetscCall(PetscSFSetFromOptions(sf));
5529   PetscCall(PetscSFSetUp(sf));
5530 
5531   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5532   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5533   PetscCall(PetscCalloc1(nrows, &pnnz));
5534   roffsets[0] = 0;
5535   roffsets[1] = 0;
5536   for (i = 0; i < plocalsize; i++) {
5537     /* diagonal */
5538     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5539     /* off-diagonal */
5540     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5541     /* compute offsets so that we relative location for each row */
5542     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5543     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5544   }
5545   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5546   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5547   /* 'r' means root, and 'l' means leaf */
5548   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5549   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5550   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5551   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5552   PetscCall(PetscSFDestroy(&sf));
5553   PetscCall(PetscFree(roffsets));
5554   PetscCall(PetscFree(nrcols));
5555   dntotalcols = 0;
5556   ontotalcols = 0;
5557   ncol        = 0;
5558   for (i = 0; i < nrows; i++) {
5559     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5560     ncol    = PetscMax(pnnz[i], ncol);
5561     /* diagonal */
5562     dntotalcols += nlcols[i * 2 + 0];
5563     /* off-diagonal */
5564     ontotalcols += nlcols[i * 2 + 1];
5565   }
5566   /* We do not need to figure the right number of columns
5567    * since all the calculations will be done by going through the raw data
5568    * */
5569   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5570   PetscCall(MatSetUp(*P_oth));
5571   PetscCall(PetscFree(pnnz));
5572   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5573   /* diagonal */
5574   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5575   /* off-diagonal */
5576   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5577   /* diagonal */
5578   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5579   /* off-diagonal */
5580   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5581   dntotalcols = 0;
5582   ontotalcols = 0;
5583   ntotalcols  = 0;
5584   for (i = 0; i < nrows; i++) {
5585     owner = 0;
5586     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5587     /* Set iremote for diag matrix */
5588     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5589       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5590       iremote[dntotalcols].rank  = owner;
5591       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5592       ilocal[dntotalcols++] = ntotalcols++;
5593     }
5594     /* off-diagonal */
5595     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5596       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5597       oiremote[ontotalcols].rank  = owner;
5598       oilocal[ontotalcols++]      = ntotalcols++;
5599     }
5600   }
5601   PetscCall(ISRestoreIndices(rows, &lrowindices));
5602   PetscCall(PetscFree(loffsets));
5603   PetscCall(PetscFree(nlcols));
5604   PetscCall(PetscSFCreate(comm, &sf));
5605   /* P serves as roots and P_oth is leaves
5606    * Diag matrix
5607    * */
5608   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5609   PetscCall(PetscSFSetFromOptions(sf));
5610   PetscCall(PetscSFSetUp(sf));
5611 
5612   PetscCall(PetscSFCreate(comm, &osf));
5613   /* off-diagonal */
5614   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5615   PetscCall(PetscSFSetFromOptions(osf));
5616   PetscCall(PetscSFSetUp(osf));
5617   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5618   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5619   /* operate on the matrix internal data to save memory */
5620   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5621   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5622   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5623   /* Convert to global indices for diag matrix */
5624   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5625   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5626   /* We want P_oth store global indices */
5627   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5628   /* Use memory scalable approach */
5629   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5630   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5631   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5632   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5633   /* Convert back to local indices */
5634   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5635   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5636   nout = 0;
5637   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5638   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5639   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5640   /* Exchange values */
5641   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5642   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5643   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5644   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5645   /* Stop PETSc from shrinking memory */
5646   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5647   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5648   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5649   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5650   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5651   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5652   PetscCall(PetscSFDestroy(&sf));
5653   PetscCall(PetscSFDestroy(&osf));
5654   PetscFunctionReturn(PETSC_SUCCESS);
5655 }
5656 
5657 /*
5658  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5659  * This supports MPIAIJ and MAIJ
5660  * */
5661 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5662 {
5663   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5664   Mat_SeqAIJ *p_oth;
5665   IS          rows, map;
5666   PetscHMapI  hamp;
5667   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5668   MPI_Comm    comm;
5669   PetscSF     sf, osf;
5670   PetscBool   has;
5671 
5672   PetscFunctionBegin;
5673   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5674   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5675   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5676    *  and then create a submatrix (that often is an overlapping matrix)
5677    * */
5678   if (reuse == MAT_INITIAL_MATRIX) {
5679     /* Use a hash table to figure out unique keys */
5680     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5681     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5682     count = 0;
5683     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5684     for (i = 0; i < a->B->cmap->n; i++) {
5685       key = a->garray[i] / dof;
5686       PetscCall(PetscHMapIHas(hamp, key, &has));
5687       if (!has) {
5688         mapping[i] = count;
5689         PetscCall(PetscHMapISet(hamp, key, count++));
5690       } else {
5691         /* Current 'i' has the same value the previous step */
5692         mapping[i] = count - 1;
5693       }
5694     }
5695     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5696     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5697     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5698     PetscCall(PetscCalloc1(htsize, &rowindices));
5699     off = 0;
5700     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5701     PetscCall(PetscHMapIDestroy(&hamp));
5702     PetscCall(PetscSortInt(htsize, rowindices));
5703     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5704     /* In case, the matrix was already created but users want to recreate the matrix */
5705     PetscCall(MatDestroy(P_oth));
5706     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5707     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5708     PetscCall(ISDestroy(&map));
5709     PetscCall(ISDestroy(&rows));
5710   } else if (reuse == MAT_REUSE_MATRIX) {
5711     /* If matrix was already created, we simply update values using SF objects
5712      * that as attached to the matrix earlier.
5713      */
5714     const PetscScalar *pd_a, *po_a;
5715 
5716     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5717     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5718     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5719     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5720     /* Update values in place */
5721     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5722     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5723     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5724     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5725     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5726     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5727     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5728     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5729   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5730   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5731   PetscFunctionReturn(PETSC_SUCCESS);
5732 }
5733 
5734 /*@C
5735   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5736 
5737   Collective
5738 
5739   Input Parameters:
5740 + A     - the first matrix in `MATMPIAIJ` format
5741 . B     - the second matrix in `MATMPIAIJ` format
5742 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5743 
5744   Output Parameters:
5745 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5746 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5747 - B_seq - the sequential matrix generated
5748 
5749   Level: developer
5750 
5751 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5752 @*/
5753 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5754 {
5755   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5756   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5757   IS          isrowb, iscolb;
5758   Mat        *bseq = NULL;
5759 
5760   PetscFunctionBegin;
5761   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5762              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5763   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5764 
5765   if (scall == MAT_INITIAL_MATRIX) {
5766     start = A->cmap->rstart;
5767     cmap  = a->garray;
5768     nzA   = a->A->cmap->n;
5769     nzB   = a->B->cmap->n;
5770     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5771     ncols = 0;
5772     for (i = 0; i < nzB; i++) { /* row < local row index */
5773       if (cmap[i] < start) idx[ncols++] = cmap[i];
5774       else break;
5775     }
5776     imark = i;
5777     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5778     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5779     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5780     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5781   } else {
5782     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5783     isrowb = *rowb;
5784     iscolb = *colb;
5785     PetscCall(PetscMalloc1(1, &bseq));
5786     bseq[0] = *B_seq;
5787   }
5788   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5789   *B_seq = bseq[0];
5790   PetscCall(PetscFree(bseq));
5791   if (!rowb) {
5792     PetscCall(ISDestroy(&isrowb));
5793   } else {
5794     *rowb = isrowb;
5795   }
5796   if (!colb) {
5797     PetscCall(ISDestroy(&iscolb));
5798   } else {
5799     *colb = iscolb;
5800   }
5801   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5802   PetscFunctionReturn(PETSC_SUCCESS);
5803 }
5804 
5805 /*
5806     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5807     of the OFF-DIAGONAL portion of local A
5808 
5809     Collective
5810 
5811    Input Parameters:
5812 +    A,B - the matrices in `MATMPIAIJ` format
5813 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5814 
5815    Output Parameter:
5816 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5817 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5818 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5819 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5820 
5821     Developer Note:
5822     This directly accesses information inside the VecScatter associated with the matrix-vector product
5823      for this matrix. This is not desirable..
5824 
5825     Level: developer
5826 
5827 */
5828 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5829 {
5830   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5831   Mat_SeqAIJ        *b_oth;
5832   VecScatter         ctx;
5833   MPI_Comm           comm;
5834   const PetscMPIInt *rprocs, *sprocs;
5835   const PetscInt    *srow, *rstarts, *sstarts;
5836   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5837   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5838   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5839   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5840   PetscMPIInt        size, tag, rank, nreqs;
5841 
5842   PetscFunctionBegin;
5843   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5844   PetscCallMPI(MPI_Comm_size(comm, &size));
5845 
5846   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5847              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5848   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5849   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5850 
5851   if (size == 1) {
5852     startsj_s = NULL;
5853     bufa_ptr  = NULL;
5854     *B_oth    = NULL;
5855     PetscFunctionReturn(PETSC_SUCCESS);
5856   }
5857 
5858   ctx = a->Mvctx;
5859   tag = ((PetscObject)ctx)->tag;
5860 
5861   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5862   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5863   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5864   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5865   PetscCall(PetscMalloc1(nreqs, &reqs));
5866   rwaits = reqs;
5867   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5868 
5869   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5870   if (scall == MAT_INITIAL_MATRIX) {
5871     /* i-array */
5872     /*  post receives */
5873     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5874     for (i = 0; i < nrecvs; i++) {
5875       rowlen = rvalues + rstarts[i] * rbs;
5876       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5877       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5878     }
5879 
5880     /* pack the outgoing message */
5881     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5882 
5883     sstartsj[0] = 0;
5884     rstartsj[0] = 0;
5885     len         = 0; /* total length of j or a array to be sent */
5886     if (nsends) {
5887       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5888       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5889     }
5890     for (i = 0; i < nsends; i++) {
5891       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5892       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5893       for (j = 0; j < nrows; j++) {
5894         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5895         for (l = 0; l < sbs; l++) {
5896           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5897 
5898           rowlen[j * sbs + l] = ncols;
5899 
5900           len += ncols;
5901           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5902         }
5903         k++;
5904       }
5905       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5906 
5907       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5908     }
5909     /* recvs and sends of i-array are completed */
5910     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5911     PetscCall(PetscFree(svalues));
5912 
5913     /* allocate buffers for sending j and a arrays */
5914     PetscCall(PetscMalloc1(len + 1, &bufj));
5915     PetscCall(PetscMalloc1(len + 1, &bufa));
5916 
5917     /* create i-array of B_oth */
5918     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5919 
5920     b_othi[0] = 0;
5921     len       = 0; /* total length of j or a array to be received */
5922     k         = 0;
5923     for (i = 0; i < nrecvs; i++) {
5924       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5925       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5926       for (j = 0; j < nrows; j++) {
5927         b_othi[k + 1] = b_othi[k] + rowlen[j];
5928         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5929         k++;
5930       }
5931       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5932     }
5933     PetscCall(PetscFree(rvalues));
5934 
5935     /* allocate space for j and a arrays of B_oth */
5936     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5937     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5938 
5939     /* j-array */
5940     /*  post receives of j-array */
5941     for (i = 0; i < nrecvs; i++) {
5942       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5943       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5944     }
5945 
5946     /* pack the outgoing message j-array */
5947     if (nsends) k = sstarts[0];
5948     for (i = 0; i < nsends; i++) {
5949       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5950       bufJ  = bufj + sstartsj[i];
5951       for (j = 0; j < nrows; j++) {
5952         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5953         for (ll = 0; ll < sbs; ll++) {
5954           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5955           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5956           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5957         }
5958       }
5959       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5960     }
5961 
5962     /* recvs and sends of j-array are completed */
5963     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5964   } else if (scall == MAT_REUSE_MATRIX) {
5965     sstartsj = *startsj_s;
5966     rstartsj = *startsj_r;
5967     bufa     = *bufa_ptr;
5968     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5969     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5970   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5971 
5972   /* a-array */
5973   /*  post receives of a-array */
5974   for (i = 0; i < nrecvs; i++) {
5975     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5976     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5977   }
5978 
5979   /* pack the outgoing message a-array */
5980   if (nsends) k = sstarts[0];
5981   for (i = 0; i < nsends; i++) {
5982     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5983     bufA  = bufa + sstartsj[i];
5984     for (j = 0; j < nrows; j++) {
5985       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5986       for (ll = 0; ll < sbs; ll++) {
5987         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5988         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5989         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5990       }
5991     }
5992     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5993   }
5994   /* recvs and sends of a-array are completed */
5995   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5996   PetscCall(PetscFree(reqs));
5997 
5998   if (scall == MAT_INITIAL_MATRIX) {
5999     /* put together the new matrix */
6000     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6001 
6002     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6003     /* Since these are PETSc arrays, change flags to free them as necessary. */
6004     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6005     b_oth->free_a  = PETSC_TRUE;
6006     b_oth->free_ij = PETSC_TRUE;
6007     b_oth->nonew   = 0;
6008 
6009     PetscCall(PetscFree(bufj));
6010     if (!startsj_s || !bufa_ptr) {
6011       PetscCall(PetscFree2(sstartsj, rstartsj));
6012       PetscCall(PetscFree(bufa_ptr));
6013     } else {
6014       *startsj_s = sstartsj;
6015       *startsj_r = rstartsj;
6016       *bufa_ptr  = bufa;
6017     }
6018   } else if (scall == MAT_REUSE_MATRIX) {
6019     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6020   }
6021 
6022   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6023   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6024   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6025   PetscFunctionReturn(PETSC_SUCCESS);
6026 }
6027 
6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6031 #if defined(PETSC_HAVE_MKL_SPARSE)
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6033 #endif
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6036 #if defined(PETSC_HAVE_ELEMENTAL)
6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6038 #endif
6039 #if defined(PETSC_HAVE_SCALAPACK)
6040 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6041 #endif
6042 #if defined(PETSC_HAVE_HYPRE)
6043 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6044 #endif
6045 #if defined(PETSC_HAVE_CUDA)
6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6047 #endif
6048 #if defined(PETSC_HAVE_HIP)
6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6050 #endif
6051 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6053 #endif
6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6055 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6056 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6057 
6058 /*
6059     Computes (B'*A')' since computing B*A directly is untenable
6060 
6061                n                       p                          p
6062         [             ]       [             ]         [                 ]
6063       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6064         [             ]       [             ]         [                 ]
6065 
6066 */
6067 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6068 {
6069   Mat At, Bt, Ct;
6070 
6071   PetscFunctionBegin;
6072   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6073   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6074   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6075   PetscCall(MatDestroy(&At));
6076   PetscCall(MatDestroy(&Bt));
6077   PetscCall(MatTransposeSetPrecursor(Ct, C));
6078   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6079   PetscCall(MatDestroy(&Ct));
6080   PetscFunctionReturn(PETSC_SUCCESS);
6081 }
6082 
6083 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6084 {
6085   PetscBool cisdense;
6086 
6087   PetscFunctionBegin;
6088   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6089   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6090   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6091   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6092   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6093   PetscCall(MatSetUp(C));
6094 
6095   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6096   PetscFunctionReturn(PETSC_SUCCESS);
6097 }
6098 
6099 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6100 {
6101   Mat_Product *product = C->product;
6102   Mat          A = product->A, B = product->B;
6103 
6104   PetscFunctionBegin;
6105   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6106              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6107   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6108   C->ops->productsymbolic = MatProductSymbolic_AB;
6109   PetscFunctionReturn(PETSC_SUCCESS);
6110 }
6111 
6112 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6113 {
6114   Mat_Product *product = C->product;
6115 
6116   PetscFunctionBegin;
6117   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6118   PetscFunctionReturn(PETSC_SUCCESS);
6119 }
6120 
6121 /*
6122    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6123 
6124   Input Parameters:
6125 
6126     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6127     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6128 
6129     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6130 
6131     For Set1, j1[] contains column indices of the nonzeros.
6132     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6133     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6134     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6135 
6136     Similar for Set2.
6137 
6138     This routine merges the two sets of nonzeros row by row and removes repeats.
6139 
6140   Output Parameters: (memory is allocated by the caller)
6141 
6142     i[],j[]: the CSR of the merged matrix, which has m rows.
6143     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6144     imap2[]: similar to imap1[], but for Set2.
6145     Note we order nonzeros row-by-row and from left to right.
6146 */
6147 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6148 {
6149   PetscInt   r, m; /* Row index of mat */
6150   PetscCount t, t1, t2, b1, e1, b2, e2;
6151 
6152   PetscFunctionBegin;
6153   PetscCall(MatGetLocalSize(mat, &m, NULL));
6154   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6155   i[0]        = 0;
6156   for (r = 0; r < m; r++) { /* Do row by row merging */
6157     b1 = rowBegin1[r];
6158     e1 = rowEnd1[r];
6159     b2 = rowBegin2[r];
6160     e2 = rowEnd2[r];
6161     while (b1 < e1 && b2 < e2) {
6162       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6163         j[t]      = j1[b1];
6164         imap1[t1] = t;
6165         imap2[t2] = t;
6166         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6167         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6168         t1++;
6169         t2++;
6170         t++;
6171       } else if (j1[b1] < j2[b2]) {
6172         j[t]      = j1[b1];
6173         imap1[t1] = t;
6174         b1 += jmap1[t1 + 1] - jmap1[t1];
6175         t1++;
6176         t++;
6177       } else {
6178         j[t]      = j2[b2];
6179         imap2[t2] = t;
6180         b2 += jmap2[t2 + 1] - jmap2[t2];
6181         t2++;
6182         t++;
6183       }
6184     }
6185     /* Merge the remaining in either j1[] or j2[] */
6186     while (b1 < e1) {
6187       j[t]      = j1[b1];
6188       imap1[t1] = t;
6189       b1 += jmap1[t1 + 1] - jmap1[t1];
6190       t1++;
6191       t++;
6192     }
6193     while (b2 < e2) {
6194       j[t]      = j2[b2];
6195       imap2[t2] = t;
6196       b2 += jmap2[t2 + 1] - jmap2[t2];
6197       t2++;
6198       t++;
6199     }
6200     i[r + 1] = t;
6201   }
6202   PetscFunctionReturn(PETSC_SUCCESS);
6203 }
6204 
6205 /*
6206   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6207 
6208   Input Parameters:
6209     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6210     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6211       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6212 
6213       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6214       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6215 
6216   Output Parameters:
6217     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6218     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6219       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6220       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6221 
6222     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6223       Atot: number of entries belonging to the diagonal block.
6224       Annz: number of unique nonzeros belonging to the diagonal block.
6225       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6226         repeats (i.e., same 'i,j' pair).
6227       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6228         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6229 
6230       Atot: number of entries belonging to the diagonal block
6231       Annz: number of unique nonzeros belonging to the diagonal block.
6232 
6233     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6234 
6235     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6236 */
6237 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6238 {
6239   PetscInt    cstart, cend, rstart, rend, row, col;
6240   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6241   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6242   PetscCount  k, m, p, q, r, s, mid;
6243   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6244 
6245   PetscFunctionBegin;
6246   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6247   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6248   m = rend - rstart;
6249 
6250   /* Skip negative rows */
6251   for (k = 0; k < n; k++)
6252     if (i[k] >= 0) break;
6253 
6254   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6255      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6256   */
6257   while (k < n) {
6258     row = i[k];
6259     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6260     for (s = k; s < n; s++)
6261       if (i[s] != row) break;
6262 
6263     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6264     for (p = k; p < s; p++) {
6265       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6266       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6267     }
6268     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6269     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6270     rowBegin[row - rstart] = k;
6271     rowMid[row - rstart]   = mid;
6272     rowEnd[row - rstart]   = s;
6273 
6274     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6275     Atot += mid - k;
6276     Btot += s - mid;
6277 
6278     /* Count unique nonzeros of this diag row */
6279     for (p = k; p < mid;) {
6280       col = j[p];
6281       do {
6282         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6283         p++;
6284       } while (p < mid && j[p] == col);
6285       Annz++;
6286     }
6287 
6288     /* Count unique nonzeros of this offdiag row */
6289     for (p = mid; p < s;) {
6290       col = j[p];
6291       do {
6292         p++;
6293       } while (p < s && j[p] == col);
6294       Bnnz++;
6295     }
6296     k = s;
6297   }
6298 
6299   /* Allocation according to Atot, Btot, Annz, Bnnz */
6300   PetscCall(PetscMalloc1(Atot, &Aperm));
6301   PetscCall(PetscMalloc1(Btot, &Bperm));
6302   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6303   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6304 
6305   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6306   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6307   for (r = 0; r < m; r++) {
6308     k   = rowBegin[r];
6309     mid = rowMid[r];
6310     s   = rowEnd[r];
6311     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6312     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6313     Atot += mid - k;
6314     Btot += s - mid;
6315 
6316     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6317     for (p = k; p < mid;) {
6318       col = j[p];
6319       q   = p;
6320       do {
6321         p++;
6322       } while (p < mid && j[p] == col);
6323       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6324       Annz++;
6325     }
6326 
6327     for (p = mid; p < s;) {
6328       col = j[p];
6329       q   = p;
6330       do {
6331         p++;
6332       } while (p < s && j[p] == col);
6333       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6334       Bnnz++;
6335     }
6336   }
6337   /* Output */
6338   *Aperm_ = Aperm;
6339   *Annz_  = Annz;
6340   *Atot_  = Atot;
6341   *Ajmap_ = Ajmap;
6342   *Bperm_ = Bperm;
6343   *Bnnz_  = Bnnz;
6344   *Btot_  = Btot;
6345   *Bjmap_ = Bjmap;
6346   PetscFunctionReturn(PETSC_SUCCESS);
6347 }
6348 
6349 /*
6350   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6351 
6352   Input Parameters:
6353     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6354     nnz:  number of unique nonzeros in the merged matrix
6355     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6356     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6357 
6358   Output Parameter: (memory is allocated by the caller)
6359     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6360 
6361   Example:
6362     nnz1 = 4
6363     nnz  = 6
6364     imap = [1,3,4,5]
6365     jmap = [0,3,5,6,7]
6366    then,
6367     jmap_new = [0,0,3,3,5,6,7]
6368 */
6369 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6370 {
6371   PetscCount k, p;
6372 
6373   PetscFunctionBegin;
6374   jmap_new[0] = 0;
6375   p           = nnz;                /* p loops over jmap_new[] backwards */
6376   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6377     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6378   }
6379   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6380   PetscFunctionReturn(PETSC_SUCCESS);
6381 }
6382 
6383 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6384 {
6385   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6386 
6387   PetscFunctionBegin;
6388   PetscCall(PetscSFDestroy(&coo->sf));
6389   PetscCall(PetscFree(coo->Aperm1));
6390   PetscCall(PetscFree(coo->Bperm1));
6391   PetscCall(PetscFree(coo->Ajmap1));
6392   PetscCall(PetscFree(coo->Bjmap1));
6393   PetscCall(PetscFree(coo->Aimap2));
6394   PetscCall(PetscFree(coo->Bimap2));
6395   PetscCall(PetscFree(coo->Aperm2));
6396   PetscCall(PetscFree(coo->Bperm2));
6397   PetscCall(PetscFree(coo->Ajmap2));
6398   PetscCall(PetscFree(coo->Bjmap2));
6399   PetscCall(PetscFree(coo->Cperm1));
6400   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6401   PetscCall(PetscFree(coo));
6402   PetscFunctionReturn(PETSC_SUCCESS);
6403 }
6404 
6405 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6406 {
6407   MPI_Comm             comm;
6408   PetscMPIInt          rank, size;
6409   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6410   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6411   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6412   PetscContainer       container;
6413   MatCOOStruct_MPIAIJ *coo;
6414 
6415   PetscFunctionBegin;
6416   PetscCall(PetscFree(mpiaij->garray));
6417   PetscCall(VecDestroy(&mpiaij->lvec));
6418 #if defined(PETSC_USE_CTABLE)
6419   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6420 #else
6421   PetscCall(PetscFree(mpiaij->colmap));
6422 #endif
6423   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6424   mat->assembled     = PETSC_FALSE;
6425   mat->was_assembled = PETSC_FALSE;
6426 
6427   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6428   PetscCallMPI(MPI_Comm_size(comm, &size));
6429   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6430   PetscCall(PetscLayoutSetUp(mat->rmap));
6431   PetscCall(PetscLayoutSetUp(mat->cmap));
6432   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6433   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6434   PetscCall(MatGetLocalSize(mat, &m, &n));
6435   PetscCall(MatGetSize(mat, &M, &N));
6436 
6437   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6438   /* entries come first, then local rows, then remote rows.                     */
6439   PetscCount n1 = coo_n, *perm1;
6440   PetscInt  *i1 = coo_i, *j1 = coo_j;
6441 
6442   PetscCall(PetscMalloc1(n1, &perm1));
6443   for (k = 0; k < n1; k++) perm1[k] = k;
6444 
6445   /* Manipulate indices so that entries with negative row or col indices will have smallest
6446      row indices, local entries will have greater but negative row indices, and remote entries
6447      will have positive row indices.
6448   */
6449   for (k = 0; k < n1; k++) {
6450     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6451     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6452     else {
6453       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6454       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6455     }
6456   }
6457 
6458   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6459   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6460 
6461   /* Advance k to the first entry we need to take care of */
6462   for (k = 0; k < n1; k++)
6463     if (i1[k] > PETSC_MIN_INT) break;
6464   PetscInt i1start = k;
6465 
6466   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6467   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6468 
6469   /*           Send remote rows to their owner                                  */
6470   /* Find which rows should be sent to which remote ranks*/
6471   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6472   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6473   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6474   const PetscInt *ranges;
6475   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6476 
6477   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6478   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6479   for (k = rem; k < n1;) {
6480     PetscMPIInt owner;
6481     PetscInt    firstRow, lastRow;
6482 
6483     /* Locate a row range */
6484     firstRow = i1[k]; /* first row of this owner */
6485     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6486     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6487 
6488     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6489     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6490 
6491     /* All entries in [k,p) belong to this remote owner */
6492     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6493       PetscMPIInt *sendto2;
6494       PetscInt    *nentries2;
6495       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6496 
6497       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6498       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6499       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6500       PetscCall(PetscFree2(sendto, nentries2));
6501       sendto   = sendto2;
6502       nentries = nentries2;
6503       maxNsend = maxNsend2;
6504     }
6505     sendto[nsend]   = owner;
6506     nentries[nsend] = p - k;
6507     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6508     nsend++;
6509     k = p;
6510   }
6511 
6512   /* Build 1st SF to know offsets on remote to send data */
6513   PetscSF      sf1;
6514   PetscInt     nroots = 1, nroots2 = 0;
6515   PetscInt     nleaves = nsend, nleaves2 = 0;
6516   PetscInt    *offsets;
6517   PetscSFNode *iremote;
6518 
6519   PetscCall(PetscSFCreate(comm, &sf1));
6520   PetscCall(PetscMalloc1(nsend, &iremote));
6521   PetscCall(PetscMalloc1(nsend, &offsets));
6522   for (k = 0; k < nsend; k++) {
6523     iremote[k].rank  = sendto[k];
6524     iremote[k].index = 0;
6525     nleaves2 += nentries[k];
6526     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6527   }
6528   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6529   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6530   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6531   PetscCall(PetscSFDestroy(&sf1));
6532   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6533 
6534   /* Build 2nd SF to send remote COOs to their owner */
6535   PetscSF sf2;
6536   nroots  = nroots2;
6537   nleaves = nleaves2;
6538   PetscCall(PetscSFCreate(comm, &sf2));
6539   PetscCall(PetscSFSetFromOptions(sf2));
6540   PetscCall(PetscMalloc1(nleaves, &iremote));
6541   p = 0;
6542   for (k = 0; k < nsend; k++) {
6543     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6544     for (q = 0; q < nentries[k]; q++, p++) {
6545       iremote[p].rank  = sendto[k];
6546       iremote[p].index = offsets[k] + q;
6547     }
6548   }
6549   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6550 
6551   /* Send the remote COOs to their owner */
6552   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6553   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6554   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6555   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6556   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6557   PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */
6558   PetscInt *j1prem = j1 ? j1 + rem : NULL;
6559   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6560   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6561   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6562   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6563 
6564   PetscCall(PetscFree(offsets));
6565   PetscCall(PetscFree2(sendto, nentries));
6566 
6567   /* Sort received COOs by row along with the permutation array     */
6568   for (k = 0; k < n2; k++) perm2[k] = k;
6569   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6570 
6571   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6572   PetscCount *Cperm1;
6573   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6574   PetscCount *perm1prem = perm1 ? perm1 + rem : NULL;
6575   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6576   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6577 
6578   /* Support for HYPRE matrices, kind of a hack.
6579      Swap min column with diagonal so that diagonal values will go first */
6580   PetscBool   hypre;
6581   const char *name;
6582   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6583   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6584   if (hypre) {
6585     PetscInt *minj;
6586     PetscBT   hasdiag;
6587 
6588     PetscCall(PetscBTCreate(m, &hasdiag));
6589     PetscCall(PetscMalloc1(m, &minj));
6590     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6591     for (k = i1start; k < rem; k++) {
6592       if (j1[k] < cstart || j1[k] >= cend) continue;
6593       const PetscInt rindex = i1[k] - rstart;
6594       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6595       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6596     }
6597     for (k = 0; k < n2; k++) {
6598       if (j2[k] < cstart || j2[k] >= cend) continue;
6599       const PetscInt rindex = i2[k] - rstart;
6600       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6601       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6602     }
6603     for (k = i1start; k < rem; k++) {
6604       const PetscInt rindex = i1[k] - rstart;
6605       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6606       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6607       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6608     }
6609     for (k = 0; k < n2; k++) {
6610       const PetscInt rindex = i2[k] - rstart;
6611       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6612       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6613       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6614     }
6615     PetscCall(PetscBTDestroy(&hasdiag));
6616     PetscCall(PetscFree(minj));
6617   }
6618 
6619   /* Split local COOs and received COOs into diag/offdiag portions */
6620   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6621   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6622   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6623   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6624   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6625   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6626 
6627   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6628   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6629   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6630   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6631 
6632   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6633   PetscInt *Ai, *Bi;
6634   PetscInt *Aj, *Bj;
6635 
6636   PetscCall(PetscMalloc1(m + 1, &Ai));
6637   PetscCall(PetscMalloc1(m + 1, &Bi));
6638   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6639   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6640 
6641   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6642   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6643   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6644   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6645   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6646 
6647   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6648   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6649 
6650   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6651   /* expect nonzeros in A/B most likely have local contributing entries        */
6652   PetscInt    Annz = Ai[m];
6653   PetscInt    Bnnz = Bi[m];
6654   PetscCount *Ajmap1_new, *Bjmap1_new;
6655 
6656   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6657   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6658 
6659   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6660   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6661 
6662   PetscCall(PetscFree(Aimap1));
6663   PetscCall(PetscFree(Ajmap1));
6664   PetscCall(PetscFree(Bimap1));
6665   PetscCall(PetscFree(Bjmap1));
6666   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6667   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6668   PetscCall(PetscFree(perm1));
6669   PetscCall(PetscFree3(i2, j2, perm2));
6670 
6671   Ajmap1 = Ajmap1_new;
6672   Bjmap1 = Bjmap1_new;
6673 
6674   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6675   if (Annz < Annz1 + Annz2) {
6676     PetscInt *Aj_new;
6677     PetscCall(PetscMalloc1(Annz, &Aj_new));
6678     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6679     PetscCall(PetscFree(Aj));
6680     Aj = Aj_new;
6681   }
6682 
6683   if (Bnnz < Bnnz1 + Bnnz2) {
6684     PetscInt *Bj_new;
6685     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6686     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6687     PetscCall(PetscFree(Bj));
6688     Bj = Bj_new;
6689   }
6690 
6691   /* Create new submatrices for on-process and off-process coupling                  */
6692   PetscScalar     *Aa, *Ba;
6693   MatType          rtype;
6694   Mat_SeqAIJ      *a, *b;
6695   PetscObjectState state;
6696   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6697   PetscCall(PetscCalloc1(Bnnz, &Ba));
6698   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6699   if (cstart) {
6700     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6701   }
6702 
6703   PetscCall(MatGetRootType_Private(mat, &rtype));
6704 
6705   MatSeqXAIJGetOptions_Private(mpiaij->A);
6706   PetscCall(MatDestroy(&mpiaij->A));
6707   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6708   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6709   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6710 
6711   MatSeqXAIJGetOptions_Private(mpiaij->B);
6712   PetscCall(MatDestroy(&mpiaij->B));
6713   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6714   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6715   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6716 
6717   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6718   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6719   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6720   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6721 
6722   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6723   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6724   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6725   a->free_a = b->free_a = PETSC_TRUE;
6726   a->free_ij = b->free_ij = PETSC_TRUE;
6727 
6728   /* conversion must happen AFTER multiply setup */
6729   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6730   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6731   PetscCall(VecDestroy(&mpiaij->lvec));
6732   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6733 
6734   // Put the COO struct in a container and then attach that to the matrix
6735   PetscCall(PetscMalloc1(1, &coo));
6736   coo->n       = coo_n;
6737   coo->sf      = sf2;
6738   coo->sendlen = nleaves;
6739   coo->recvlen = nroots;
6740   coo->Annz    = Annz;
6741   coo->Bnnz    = Bnnz;
6742   coo->Annz2   = Annz2;
6743   coo->Bnnz2   = Bnnz2;
6744   coo->Atot1   = Atot1;
6745   coo->Atot2   = Atot2;
6746   coo->Btot1   = Btot1;
6747   coo->Btot2   = Btot2;
6748   coo->Ajmap1  = Ajmap1;
6749   coo->Aperm1  = Aperm1;
6750   coo->Bjmap1  = Bjmap1;
6751   coo->Bperm1  = Bperm1;
6752   coo->Aimap2  = Aimap2;
6753   coo->Ajmap2  = Ajmap2;
6754   coo->Aperm2  = Aperm2;
6755   coo->Bimap2  = Bimap2;
6756   coo->Bjmap2  = Bjmap2;
6757   coo->Bperm2  = Bperm2;
6758   coo->Cperm1  = Cperm1;
6759   // Allocate in preallocation. If not used, it has zero cost on host
6760   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6761   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6762   PetscCall(PetscContainerSetPointer(container, coo));
6763   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6764   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6765   PetscCall(PetscContainerDestroy(&container));
6766   PetscFunctionReturn(PETSC_SUCCESS);
6767 }
6768 
6769 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6770 {
6771   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6772   Mat                  A = mpiaij->A, B = mpiaij->B;
6773   PetscScalar         *Aa, *Ba;
6774   PetscScalar         *sendbuf, *recvbuf;
6775   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6776   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6777   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6778   const PetscCount    *Cperm1;
6779   PetscContainer       container;
6780   MatCOOStruct_MPIAIJ *coo;
6781 
6782   PetscFunctionBegin;
6783   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6784   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6785   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6786   sendbuf = coo->sendbuf;
6787   recvbuf = coo->recvbuf;
6788   Ajmap1  = coo->Ajmap1;
6789   Ajmap2  = coo->Ajmap2;
6790   Aimap2  = coo->Aimap2;
6791   Bjmap1  = coo->Bjmap1;
6792   Bjmap2  = coo->Bjmap2;
6793   Bimap2  = coo->Bimap2;
6794   Aperm1  = coo->Aperm1;
6795   Aperm2  = coo->Aperm2;
6796   Bperm1  = coo->Bperm1;
6797   Bperm2  = coo->Bperm2;
6798   Cperm1  = coo->Cperm1;
6799 
6800   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6801   PetscCall(MatSeqAIJGetArray(B, &Ba));
6802 
6803   /* Pack entries to be sent to remote */
6804   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6805 
6806   /* Send remote entries to their owner and overlap the communication with local computation */
6807   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6808   /* Add local entries to A and B */
6809   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6810     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6811     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6812     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6813   }
6814   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6815     PetscScalar sum = 0.0;
6816     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6817     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6818   }
6819   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6820 
6821   /* Add received remote entries to A and B */
6822   for (PetscCount i = 0; i < coo->Annz2; i++) {
6823     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6824   }
6825   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6826     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6827   }
6828   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6829   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6830   PetscFunctionReturn(PETSC_SUCCESS);
6831 }
6832 
6833 /*MC
6834    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6835 
6836    Options Database Keys:
6837 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6838 
6839    Level: beginner
6840 
6841    Notes:
6842    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6843     in this case the values associated with the rows and columns one passes in are set to zero
6844     in the matrix
6845 
6846     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6847     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6848 
6849 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6850 M*/
6851 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6852 {
6853   Mat_MPIAIJ *b;
6854   PetscMPIInt size;
6855 
6856   PetscFunctionBegin;
6857   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6858 
6859   PetscCall(PetscNew(&b));
6860   B->data       = (void *)b;
6861   B->ops[0]     = MatOps_Values;
6862   B->assembled  = PETSC_FALSE;
6863   B->insertmode = NOT_SET_VALUES;
6864   b->size       = size;
6865 
6866   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6867 
6868   /* build cache for off array entries formed */
6869   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6870 
6871   b->donotstash  = PETSC_FALSE;
6872   b->colmap      = NULL;
6873   b->garray      = NULL;
6874   b->roworiented = PETSC_TRUE;
6875 
6876   /* stuff used for matrix vector multiply */
6877   b->lvec  = NULL;
6878   b->Mvctx = NULL;
6879 
6880   /* stuff for MatGetRow() */
6881   b->rowindices   = NULL;
6882   b->rowvalues    = NULL;
6883   b->getrowactive = PETSC_FALSE;
6884 
6885   /* flexible pointer used in CUSPARSE classes */
6886   b->spptr = NULL;
6887 
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6889   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6898 #if defined(PETSC_HAVE_CUDA)
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6900 #endif
6901 #if defined(PETSC_HAVE_HIP)
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6903 #endif
6904 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6906 #endif
6907 #if defined(PETSC_HAVE_MKL_SPARSE)
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6909 #endif
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6914 #if defined(PETSC_HAVE_ELEMENTAL)
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6916 #endif
6917 #if defined(PETSC_HAVE_SCALAPACK)
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6919 #endif
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6922 #if defined(PETSC_HAVE_HYPRE)
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6925 #endif
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6930   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6931   PetscFunctionReturn(PETSC_SUCCESS);
6932 }
6933 
6934 /*@C
6935   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6936   and "off-diagonal" part of the matrix in CSR format.
6937 
6938   Collective
6939 
6940   Input Parameters:
6941 + comm - MPI communicator
6942 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6943 . n    - This value should be the same as the local size used in creating the
6944          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6945          calculated if `N` is given) For square matrices `n` is almost always `m`.
6946 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6947 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6948 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6949 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6950 . a    - matrix values
6951 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6952 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6953 - oa   - matrix values
6954 
6955   Output Parameter:
6956 . mat - the matrix
6957 
6958   Level: advanced
6959 
6960   Notes:
6961   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6962   must free the arrays once the matrix has been destroyed and not before.
6963 
6964   The `i` and `j` indices are 0 based
6965 
6966   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6967 
6968   This sets local rows and cannot be used to set off-processor values.
6969 
6970   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6971   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6972   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6973   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6974   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6975   communication if it is known that only local entries will be set.
6976 
6977 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6978           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6979 @*/
6980 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6981 {
6982   Mat_MPIAIJ *maij;
6983 
6984   PetscFunctionBegin;
6985   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6986   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6987   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6988   PetscCall(MatCreate(comm, mat));
6989   PetscCall(MatSetSizes(*mat, m, n, M, N));
6990   PetscCall(MatSetType(*mat, MATMPIAIJ));
6991   maij = (Mat_MPIAIJ *)(*mat)->data;
6992 
6993   (*mat)->preallocated = PETSC_TRUE;
6994 
6995   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6996   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6997 
6998   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6999   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7000 
7001   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7002   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7003   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7004   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7005   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7006   PetscFunctionReturn(PETSC_SUCCESS);
7007 }
7008 
7009 typedef struct {
7010   Mat       *mp;    /* intermediate products */
7011   PetscBool *mptmp; /* is the intermediate product temporary ? */
7012   PetscInt   cp;    /* number of intermediate products */
7013 
7014   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7015   PetscInt    *startsj_s, *startsj_r;
7016   PetscScalar *bufa;
7017   Mat          P_oth;
7018 
7019   /* may take advantage of merging product->B */
7020   Mat Bloc; /* B-local by merging diag and off-diag */
7021 
7022   /* cusparse does not have support to split between symbolic and numeric phases.
7023      When api_user is true, we don't need to update the numerical values
7024      of the temporary storage */
7025   PetscBool reusesym;
7026 
7027   /* support for COO values insertion */
7028   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7029   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7030   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7031   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7032   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7033   PetscMemType mtype;
7034 
7035   /* customization */
7036   PetscBool abmerge;
7037   PetscBool P_oth_bind;
7038 } MatMatMPIAIJBACKEND;
7039 
7040 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7041 {
7042   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7043   PetscInt             i;
7044 
7045   PetscFunctionBegin;
7046   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7047   PetscCall(PetscFree(mmdata->bufa));
7048   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7049   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7050   PetscCall(MatDestroy(&mmdata->P_oth));
7051   PetscCall(MatDestroy(&mmdata->Bloc));
7052   PetscCall(PetscSFDestroy(&mmdata->sf));
7053   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7054   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7055   PetscCall(PetscFree(mmdata->own[0]));
7056   PetscCall(PetscFree(mmdata->own));
7057   PetscCall(PetscFree(mmdata->off[0]));
7058   PetscCall(PetscFree(mmdata->off));
7059   PetscCall(PetscFree(mmdata));
7060   PetscFunctionReturn(PETSC_SUCCESS);
7061 }
7062 
7063 /* Copy selected n entries with indices in idx[] of A to v[].
7064    If idx is NULL, copy the whole data array of A to v[]
7065  */
7066 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7067 {
7068   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7069 
7070   PetscFunctionBegin;
7071   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7072   if (f) {
7073     PetscCall((*f)(A, n, idx, v));
7074   } else {
7075     const PetscScalar *vv;
7076 
7077     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7078     if (n && idx) {
7079       PetscScalar    *w  = v;
7080       const PetscInt *oi = idx;
7081       PetscInt        j;
7082 
7083       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7084     } else {
7085       PetscCall(PetscArraycpy(v, vv, n));
7086     }
7087     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7088   }
7089   PetscFunctionReturn(PETSC_SUCCESS);
7090 }
7091 
7092 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7093 {
7094   MatMatMPIAIJBACKEND *mmdata;
7095   PetscInt             i, n_d, n_o;
7096 
7097   PetscFunctionBegin;
7098   MatCheckProduct(C, 1);
7099   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7100   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7101   if (!mmdata->reusesym) { /* update temporary matrices */
7102     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7103     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7104   }
7105   mmdata->reusesym = PETSC_FALSE;
7106 
7107   for (i = 0; i < mmdata->cp; i++) {
7108     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7109     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7110   }
7111   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7112     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7113 
7114     if (mmdata->mptmp[i]) continue;
7115     if (noff) {
7116       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7117 
7118       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7119       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7120       n_o += noff;
7121       n_d += nown;
7122     } else {
7123       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7124 
7125       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7126       n_d += mm->nz;
7127     }
7128   }
7129   if (mmdata->hasoffproc) { /* offprocess insertion */
7130     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7131     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7132   }
7133   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7134   PetscFunctionReturn(PETSC_SUCCESS);
7135 }
7136 
7137 /* Support for Pt * A, A * P, or Pt * A * P */
7138 #define MAX_NUMBER_INTERMEDIATE 4
7139 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7140 {
7141   Mat_Product           *product = C->product;
7142   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7143   Mat_MPIAIJ            *a, *p;
7144   MatMatMPIAIJBACKEND   *mmdata;
7145   ISLocalToGlobalMapping P_oth_l2g = NULL;
7146   IS                     glob      = NULL;
7147   const char            *prefix;
7148   char                   pprefix[256];
7149   const PetscInt        *globidx, *P_oth_idx;
7150   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7151   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7152   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7153                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7154                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7155   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7156 
7157   MatProductType ptype;
7158   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7159   PetscMPIInt    size;
7160 
7161   PetscFunctionBegin;
7162   MatCheckProduct(C, 1);
7163   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7164   ptype = product->type;
7165   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7166     ptype                                          = MATPRODUCT_AB;
7167     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7168   }
7169   switch (ptype) {
7170   case MATPRODUCT_AB:
7171     A          = product->A;
7172     P          = product->B;
7173     m          = A->rmap->n;
7174     n          = P->cmap->n;
7175     M          = A->rmap->N;
7176     N          = P->cmap->N;
7177     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7178     break;
7179   case MATPRODUCT_AtB:
7180     P          = product->A;
7181     A          = product->B;
7182     m          = P->cmap->n;
7183     n          = A->cmap->n;
7184     M          = P->cmap->N;
7185     N          = A->cmap->N;
7186     hasoffproc = PETSC_TRUE;
7187     break;
7188   case MATPRODUCT_PtAP:
7189     A          = product->A;
7190     P          = product->B;
7191     m          = P->cmap->n;
7192     n          = P->cmap->n;
7193     M          = P->cmap->N;
7194     N          = P->cmap->N;
7195     hasoffproc = PETSC_TRUE;
7196     break;
7197   default:
7198     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7199   }
7200   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7201   if (size == 1) hasoffproc = PETSC_FALSE;
7202 
7203   /* defaults */
7204   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7205     mp[i]    = NULL;
7206     mptmp[i] = PETSC_FALSE;
7207     rmapt[i] = -1;
7208     cmapt[i] = -1;
7209     rmapa[i] = NULL;
7210     cmapa[i] = NULL;
7211   }
7212 
7213   /* customization */
7214   PetscCall(PetscNew(&mmdata));
7215   mmdata->reusesym = product->api_user;
7216   if (ptype == MATPRODUCT_AB) {
7217     if (product->api_user) {
7218       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7219       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7220       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7221       PetscOptionsEnd();
7222     } else {
7223       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7224       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7225       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7226       PetscOptionsEnd();
7227     }
7228   } else if (ptype == MATPRODUCT_PtAP) {
7229     if (product->api_user) {
7230       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7231       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7232       PetscOptionsEnd();
7233     } else {
7234       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7235       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7236       PetscOptionsEnd();
7237     }
7238   }
7239   a = (Mat_MPIAIJ *)A->data;
7240   p = (Mat_MPIAIJ *)P->data;
7241   PetscCall(MatSetSizes(C, m, n, M, N));
7242   PetscCall(PetscLayoutSetUp(C->rmap));
7243   PetscCall(PetscLayoutSetUp(C->cmap));
7244   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7245   PetscCall(MatGetOptionsPrefix(C, &prefix));
7246 
7247   cp = 0;
7248   switch (ptype) {
7249   case MATPRODUCT_AB: /* A * P */
7250     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7251 
7252     /* A_diag * P_local (merged or not) */
7253     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7254       /* P is product->B */
7255       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7256       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7257       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7258       PetscCall(MatProductSetFill(mp[cp], product->fill));
7259       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7260       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7261       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7262       mp[cp]->product->api_user = product->api_user;
7263       PetscCall(MatProductSetFromOptions(mp[cp]));
7264       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7265       PetscCall(ISGetIndices(glob, &globidx));
7266       rmapt[cp] = 1;
7267       cmapt[cp] = 2;
7268       cmapa[cp] = globidx;
7269       mptmp[cp] = PETSC_FALSE;
7270       cp++;
7271     } else { /* A_diag * P_diag and A_diag * P_off */
7272       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7273       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7274       PetscCall(MatProductSetFill(mp[cp], product->fill));
7275       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7276       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7277       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7278       mp[cp]->product->api_user = product->api_user;
7279       PetscCall(MatProductSetFromOptions(mp[cp]));
7280       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7281       rmapt[cp] = 1;
7282       cmapt[cp] = 1;
7283       mptmp[cp] = PETSC_FALSE;
7284       cp++;
7285       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7286       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7287       PetscCall(MatProductSetFill(mp[cp], product->fill));
7288       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7289       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7290       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7291       mp[cp]->product->api_user = product->api_user;
7292       PetscCall(MatProductSetFromOptions(mp[cp]));
7293       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7294       rmapt[cp] = 1;
7295       cmapt[cp] = 2;
7296       cmapa[cp] = p->garray;
7297       mptmp[cp] = PETSC_FALSE;
7298       cp++;
7299     }
7300 
7301     /* A_off * P_other */
7302     if (mmdata->P_oth) {
7303       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7304       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7305       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7306       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7307       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7308       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7309       PetscCall(MatProductSetFill(mp[cp], product->fill));
7310       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7311       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7312       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7313       mp[cp]->product->api_user = product->api_user;
7314       PetscCall(MatProductSetFromOptions(mp[cp]));
7315       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7316       rmapt[cp] = 1;
7317       cmapt[cp] = 2;
7318       cmapa[cp] = P_oth_idx;
7319       mptmp[cp] = PETSC_FALSE;
7320       cp++;
7321     }
7322     break;
7323 
7324   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7325     /* A is product->B */
7326     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7327     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7328       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7329       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7330       PetscCall(MatProductSetFill(mp[cp], product->fill));
7331       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7332       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7333       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7334       mp[cp]->product->api_user = product->api_user;
7335       PetscCall(MatProductSetFromOptions(mp[cp]));
7336       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7337       PetscCall(ISGetIndices(glob, &globidx));
7338       rmapt[cp] = 2;
7339       rmapa[cp] = globidx;
7340       cmapt[cp] = 2;
7341       cmapa[cp] = globidx;
7342       mptmp[cp] = PETSC_FALSE;
7343       cp++;
7344     } else {
7345       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7346       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7347       PetscCall(MatProductSetFill(mp[cp], product->fill));
7348       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7349       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7350       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7351       mp[cp]->product->api_user = product->api_user;
7352       PetscCall(MatProductSetFromOptions(mp[cp]));
7353       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7354       PetscCall(ISGetIndices(glob, &globidx));
7355       rmapt[cp] = 1;
7356       cmapt[cp] = 2;
7357       cmapa[cp] = globidx;
7358       mptmp[cp] = PETSC_FALSE;
7359       cp++;
7360       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7361       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7362       PetscCall(MatProductSetFill(mp[cp], product->fill));
7363       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7364       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7365       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7366       mp[cp]->product->api_user = product->api_user;
7367       PetscCall(MatProductSetFromOptions(mp[cp]));
7368       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7369       rmapt[cp] = 2;
7370       rmapa[cp] = p->garray;
7371       cmapt[cp] = 2;
7372       cmapa[cp] = globidx;
7373       mptmp[cp] = PETSC_FALSE;
7374       cp++;
7375     }
7376     break;
7377   case MATPRODUCT_PtAP:
7378     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7379     /* P is product->B */
7380     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7381     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7382     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7383     PetscCall(MatProductSetFill(mp[cp], product->fill));
7384     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7385     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7386     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7387     mp[cp]->product->api_user = product->api_user;
7388     PetscCall(MatProductSetFromOptions(mp[cp]));
7389     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7390     PetscCall(ISGetIndices(glob, &globidx));
7391     rmapt[cp] = 2;
7392     rmapa[cp] = globidx;
7393     cmapt[cp] = 2;
7394     cmapa[cp] = globidx;
7395     mptmp[cp] = PETSC_FALSE;
7396     cp++;
7397     if (mmdata->P_oth) {
7398       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7399       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7400       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7401       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7402       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7403       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7404       PetscCall(MatProductSetFill(mp[cp], product->fill));
7405       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7406       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7407       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7408       mp[cp]->product->api_user = product->api_user;
7409       PetscCall(MatProductSetFromOptions(mp[cp]));
7410       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7411       mptmp[cp] = PETSC_TRUE;
7412       cp++;
7413       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7414       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7415       PetscCall(MatProductSetFill(mp[cp], product->fill));
7416       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7417       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7418       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7419       mp[cp]->product->api_user = product->api_user;
7420       PetscCall(MatProductSetFromOptions(mp[cp]));
7421       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7422       rmapt[cp] = 2;
7423       rmapa[cp] = globidx;
7424       cmapt[cp] = 2;
7425       cmapa[cp] = P_oth_idx;
7426       mptmp[cp] = PETSC_FALSE;
7427       cp++;
7428     }
7429     break;
7430   default:
7431     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7432   }
7433   /* sanity check */
7434   if (size > 1)
7435     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7436 
7437   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7438   for (i = 0; i < cp; i++) {
7439     mmdata->mp[i]    = mp[i];
7440     mmdata->mptmp[i] = mptmp[i];
7441   }
7442   mmdata->cp             = cp;
7443   C->product->data       = mmdata;
7444   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7445   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7446 
7447   /* memory type */
7448   mmdata->mtype = PETSC_MEMTYPE_HOST;
7449   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7450   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7451   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7452   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7453   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7454   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7455 
7456   /* prepare coo coordinates for values insertion */
7457 
7458   /* count total nonzeros of those intermediate seqaij Mats
7459     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7460     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7461     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7462   */
7463   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7464     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7465     if (mptmp[cp]) continue;
7466     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7467       const PetscInt *rmap = rmapa[cp];
7468       const PetscInt  mr   = mp[cp]->rmap->n;
7469       const PetscInt  rs   = C->rmap->rstart;
7470       const PetscInt  re   = C->rmap->rend;
7471       const PetscInt *ii   = mm->i;
7472       for (i = 0; i < mr; i++) {
7473         const PetscInt gr = rmap[i];
7474         const PetscInt nz = ii[i + 1] - ii[i];
7475         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7476         else ncoo_oown += nz;                  /* this row is local */
7477       }
7478     } else ncoo_d += mm->nz;
7479   }
7480 
7481   /*
7482     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7483 
7484     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7485 
7486     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7487 
7488     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7489     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7490     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7491 
7492     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7493     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7494   */
7495   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7496   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7497 
7498   /* gather (i,j) of nonzeros inserted by remote procs */
7499   if (hasoffproc) {
7500     PetscSF  msf;
7501     PetscInt ncoo2, *coo_i2, *coo_j2;
7502 
7503     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7504     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7505     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7506 
7507     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7508       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7509       PetscInt   *idxoff = mmdata->off[cp];
7510       PetscInt   *idxown = mmdata->own[cp];
7511       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7512         const PetscInt *rmap = rmapa[cp];
7513         const PetscInt *cmap = cmapa[cp];
7514         const PetscInt *ii   = mm->i;
7515         PetscInt       *coi  = coo_i + ncoo_o;
7516         PetscInt       *coj  = coo_j + ncoo_o;
7517         const PetscInt  mr   = mp[cp]->rmap->n;
7518         const PetscInt  rs   = C->rmap->rstart;
7519         const PetscInt  re   = C->rmap->rend;
7520         const PetscInt  cs   = C->cmap->rstart;
7521         for (i = 0; i < mr; i++) {
7522           const PetscInt *jj = mm->j + ii[i];
7523           const PetscInt  gr = rmap[i];
7524           const PetscInt  nz = ii[i + 1] - ii[i];
7525           if (gr < rs || gr >= re) { /* this is an offproc row */
7526             for (j = ii[i]; j < ii[i + 1]; j++) {
7527               *coi++    = gr;
7528               *idxoff++ = j;
7529             }
7530             if (!cmapt[cp]) { /* already global */
7531               for (j = 0; j < nz; j++) *coj++ = jj[j];
7532             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7533               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7534             } else { /* offdiag */
7535               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7536             }
7537             ncoo_o += nz;
7538           } else { /* this is a local row */
7539             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7540           }
7541         }
7542       }
7543       mmdata->off[cp + 1] = idxoff;
7544       mmdata->own[cp + 1] = idxown;
7545     }
7546 
7547     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7548     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7549     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7550     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7551     ncoo = ncoo_d + ncoo_oown + ncoo2;
7552     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7553     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7554     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7555     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7556     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7557     PetscCall(PetscFree2(coo_i, coo_j));
7558     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7559     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7560     coo_i = coo_i2;
7561     coo_j = coo_j2;
7562   } else { /* no offproc values insertion */
7563     ncoo = ncoo_d;
7564     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7565 
7566     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7567     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7568     PetscCall(PetscSFSetUp(mmdata->sf));
7569   }
7570   mmdata->hasoffproc = hasoffproc;
7571 
7572   /* gather (i,j) of nonzeros inserted locally */
7573   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7574     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7575     PetscInt       *coi  = coo_i + ncoo_d;
7576     PetscInt       *coj  = coo_j + ncoo_d;
7577     const PetscInt *jj   = mm->j;
7578     const PetscInt *ii   = mm->i;
7579     const PetscInt *cmap = cmapa[cp];
7580     const PetscInt *rmap = rmapa[cp];
7581     const PetscInt  mr   = mp[cp]->rmap->n;
7582     const PetscInt  rs   = C->rmap->rstart;
7583     const PetscInt  re   = C->rmap->rend;
7584     const PetscInt  cs   = C->cmap->rstart;
7585 
7586     if (mptmp[cp]) continue;
7587     if (rmapt[cp] == 1) { /* consecutive rows */
7588       /* fill coo_i */
7589       for (i = 0; i < mr; i++) {
7590         const PetscInt gr = i + rs;
7591         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7592       }
7593       /* fill coo_j */
7594       if (!cmapt[cp]) { /* type-0, already global */
7595         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7596       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7597         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7598       } else {                                            /* type-2, local to global for sparse columns */
7599         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7600       }
7601       ncoo_d += mm->nz;
7602     } else if (rmapt[cp] == 2) { /* sparse rows */
7603       for (i = 0; i < mr; i++) {
7604         const PetscInt *jj = mm->j + ii[i];
7605         const PetscInt  gr = rmap[i];
7606         const PetscInt  nz = ii[i + 1] - ii[i];
7607         if (gr >= rs && gr < re) { /* local rows */
7608           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7609           if (!cmapt[cp]) { /* type-0, already global */
7610             for (j = 0; j < nz; j++) *coj++ = jj[j];
7611           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7612             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7613           } else { /* type-2, local to global for sparse columns */
7614             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7615           }
7616           ncoo_d += nz;
7617         }
7618       }
7619     }
7620   }
7621   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7622   PetscCall(ISDestroy(&glob));
7623   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7624   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7625   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7626   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7627 
7628   /* preallocate with COO data */
7629   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7630   PetscCall(PetscFree2(coo_i, coo_j));
7631   PetscFunctionReturn(PETSC_SUCCESS);
7632 }
7633 
7634 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7635 {
7636   Mat_Product *product = mat->product;
7637 #if defined(PETSC_HAVE_DEVICE)
7638   PetscBool match  = PETSC_FALSE;
7639   PetscBool usecpu = PETSC_FALSE;
7640 #else
7641   PetscBool match = PETSC_TRUE;
7642 #endif
7643 
7644   PetscFunctionBegin;
7645   MatCheckProduct(mat, 1);
7646 #if defined(PETSC_HAVE_DEVICE)
7647   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7648   if (match) { /* we can always fallback to the CPU if requested */
7649     switch (product->type) {
7650     case MATPRODUCT_AB:
7651       if (product->api_user) {
7652         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7653         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7654         PetscOptionsEnd();
7655       } else {
7656         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7657         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7658         PetscOptionsEnd();
7659       }
7660       break;
7661     case MATPRODUCT_AtB:
7662       if (product->api_user) {
7663         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7664         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7665         PetscOptionsEnd();
7666       } else {
7667         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7668         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7669         PetscOptionsEnd();
7670       }
7671       break;
7672     case MATPRODUCT_PtAP:
7673       if (product->api_user) {
7674         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7675         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7676         PetscOptionsEnd();
7677       } else {
7678         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7679         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7680         PetscOptionsEnd();
7681       }
7682       break;
7683     default:
7684       break;
7685     }
7686     match = (PetscBool)!usecpu;
7687   }
7688 #endif
7689   if (match) {
7690     switch (product->type) {
7691     case MATPRODUCT_AB:
7692     case MATPRODUCT_AtB:
7693     case MATPRODUCT_PtAP:
7694       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7695       break;
7696     default:
7697       break;
7698     }
7699   }
7700   /* fallback to MPIAIJ ops */
7701   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7702   PetscFunctionReturn(PETSC_SUCCESS);
7703 }
7704 
7705 /*
7706    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7707 
7708    n - the number of block indices in cc[]
7709    cc - the block indices (must be large enough to contain the indices)
7710 */
7711 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7712 {
7713   PetscInt        cnt = -1, nidx, j;
7714   const PetscInt *idx;
7715 
7716   PetscFunctionBegin;
7717   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7718   if (nidx) {
7719     cnt     = 0;
7720     cc[cnt] = idx[0] / bs;
7721     for (j = 1; j < nidx; j++) {
7722       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7723     }
7724   }
7725   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7726   *n = cnt + 1;
7727   PetscFunctionReturn(PETSC_SUCCESS);
7728 }
7729 
7730 /*
7731     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7732 
7733     ncollapsed - the number of block indices
7734     collapsed - the block indices (must be large enough to contain the indices)
7735 */
7736 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7737 {
7738   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7739 
7740   PetscFunctionBegin;
7741   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7742   for (i = start + 1; i < start + bs; i++) {
7743     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7744     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7745     cprevtmp = cprev;
7746     cprev    = merged;
7747     merged   = cprevtmp;
7748   }
7749   *ncollapsed = nprev;
7750   if (collapsed) *collapsed = cprev;
7751   PetscFunctionReturn(PETSC_SUCCESS);
7752 }
7753 
7754 /*
7755  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7756 
7757  Input Parameter:
7758  . Amat - matrix
7759  - symmetrize - make the result symmetric
7760  + scale - scale with diagonal
7761 
7762  Output Parameter:
7763  . a_Gmat - output scalar graph >= 0
7764 
7765 */
7766 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7767 {
7768   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7769   MPI_Comm  comm;
7770   Mat       Gmat;
7771   PetscBool ismpiaij, isseqaij;
7772   Mat       a, b, c;
7773   MatType   jtype;
7774 
7775   PetscFunctionBegin;
7776   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7777   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7778   PetscCall(MatGetSize(Amat, &MM, &NN));
7779   PetscCall(MatGetBlockSize(Amat, &bs));
7780   nloc = (Iend - Istart) / bs;
7781 
7782   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7783   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7784   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7785 
7786   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7787   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7788      implementation */
7789   if (bs > 1) {
7790     PetscCall(MatGetType(Amat, &jtype));
7791     PetscCall(MatCreate(comm, &Gmat));
7792     PetscCall(MatSetType(Gmat, jtype));
7793     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7794     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7795     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7796       PetscInt  *d_nnz, *o_nnz;
7797       MatScalar *aa, val, *AA;
7798       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7799       if (isseqaij) {
7800         a = Amat;
7801         b = NULL;
7802       } else {
7803         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7804         a             = d->A;
7805         b             = d->B;
7806       }
7807       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7808       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7809       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7810         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7811         const PetscInt *cols1, *cols2;
7812         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7813           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7814           nnz[brow / bs] = nc2 / bs;
7815           if (nc2 % bs) ok = 0;
7816           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7817           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7818             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7819             if (nc1 != nc2) ok = 0;
7820             else {
7821               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7822                 if (cols1[jj] != cols2[jj]) ok = 0;
7823                 if (cols1[jj] % bs != jj % bs) ok = 0;
7824               }
7825             }
7826             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7827           }
7828           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7829           if (!ok) {
7830             PetscCall(PetscFree2(d_nnz, o_nnz));
7831             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7832             goto old_bs;
7833           }
7834         }
7835       }
7836       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7837       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7838       PetscCall(PetscFree2(d_nnz, o_nnz));
7839       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7840       // diag
7841       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7842         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7843         ai               = aseq->i;
7844         n                = ai[brow + 1] - ai[brow];
7845         aj               = aseq->j + ai[brow];
7846         for (int k = 0; k < n; k += bs) {        // block columns
7847           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7848           val        = 0;
7849           if (index_size == 0) {
7850             for (int ii = 0; ii < bs; ii++) { // rows in block
7851               aa = aseq->a + ai[brow + ii] + k;
7852               for (int jj = 0; jj < bs; jj++) {         // columns in block
7853                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7854               }
7855             }
7856           } else {                                       // use (index,index) value if provided
7857             for (int iii = 0; iii < index_size; iii++) { // rows in block
7858               int ii = index[iii];
7859               aa     = aseq->a + ai[brow + ii] + k;
7860               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7861                 int jj = index[jjj];
7862                 val += PetscAbs(PetscRealPart(aa[jj]));
7863               }
7864             }
7865           }
7866           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7867           AA[k / bs] = val;
7868         }
7869         grow = Istart / bs + brow / bs;
7870         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7871       }
7872       // off-diag
7873       if (ismpiaij) {
7874         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7875         const PetscScalar *vals;
7876         const PetscInt    *cols, *garray = aij->garray;
7877         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7878         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7879           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7880           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7881             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7882             AA[k / bs] = 0;
7883             AJ[cidx]   = garray[cols[k]] / bs;
7884           }
7885           nc = ncols / bs;
7886           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7887           if (index_size == 0) {
7888             for (int ii = 0; ii < bs; ii++) { // rows in block
7889               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7890               for (int k = 0; k < ncols; k += bs) {
7891                 for (int jj = 0; jj < bs; jj++) { // cols in block
7892                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7893                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7894                 }
7895               }
7896               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7897             }
7898           } else {                                       // use (index,index) value if provided
7899             for (int iii = 0; iii < index_size; iii++) { // rows in block
7900               int ii = index[iii];
7901               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7902               for (int k = 0; k < ncols; k += bs) {
7903                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7904                   int jj = index[jjj];
7905                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7906                 }
7907               }
7908               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7909             }
7910           }
7911           grow = Istart / bs + brow / bs;
7912           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7913         }
7914       }
7915       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7916       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7917       PetscCall(PetscFree2(AA, AJ));
7918     } else {
7919       const PetscScalar *vals;
7920       const PetscInt    *idx;
7921       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7922     old_bs:
7923       /*
7924        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7925        */
7926       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7927       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7928       if (isseqaij) {
7929         PetscInt max_d_nnz;
7930         /*
7931          Determine exact preallocation count for (sequential) scalar matrix
7932          */
7933         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7934         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7935         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7936         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7937         PetscCall(PetscFree3(w0, w1, w2));
7938       } else if (ismpiaij) {
7939         Mat             Daij, Oaij;
7940         const PetscInt *garray;
7941         PetscInt        max_d_nnz;
7942         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7943         /*
7944          Determine exact preallocation count for diagonal block portion of scalar matrix
7945          */
7946         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7947         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7948         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7949         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7950         PetscCall(PetscFree3(w0, w1, w2));
7951         /*
7952          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7953          */
7954         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7955           o_nnz[jj] = 0;
7956           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7957             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7958             o_nnz[jj] += ncols;
7959             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7960           }
7961           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7962         }
7963       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7964       /* get scalar copy (norms) of matrix */
7965       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7966       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7967       PetscCall(PetscFree2(d_nnz, o_nnz));
7968       for (Ii = Istart; Ii < Iend; Ii++) {
7969         PetscInt dest_row = Ii / bs;
7970         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7971         for (jj = 0; jj < ncols; jj++) {
7972           PetscInt    dest_col = idx[jj] / bs;
7973           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7974           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7975         }
7976         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7977       }
7978       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7979       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7980     }
7981   } else {
7982     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7983     else {
7984       Gmat = Amat;
7985       PetscCall(PetscObjectReference((PetscObject)Gmat));
7986     }
7987     if (isseqaij) {
7988       a = Gmat;
7989       b = NULL;
7990     } else {
7991       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7992       a             = d->A;
7993       b             = d->B;
7994     }
7995     if (filter >= 0 || scale) {
7996       /* take absolute value of each entry */
7997       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7998         MatInfo      info;
7999         PetscScalar *avals;
8000         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8001         PetscCall(MatSeqAIJGetArray(c, &avals));
8002         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8003         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8004       }
8005     }
8006   }
8007   if (symmetrize) {
8008     PetscBool isset, issym;
8009     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8010     if (!isset || !issym) {
8011       Mat matTrans;
8012       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8013       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8014       PetscCall(MatDestroy(&matTrans));
8015     }
8016     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8017   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8018   if (scale) {
8019     /* scale c for all diagonal values = 1 or -1 */
8020     Vec diag;
8021     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8022     PetscCall(MatGetDiagonal(Gmat, diag));
8023     PetscCall(VecReciprocal(diag));
8024     PetscCall(VecSqrtAbs(diag));
8025     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8026     PetscCall(VecDestroy(&diag));
8027   }
8028   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8029 
8030   if (filter >= 0) {
8031     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8032     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8033   }
8034   *a_Gmat = Gmat;
8035   PetscFunctionReturn(PETSC_SUCCESS);
8036 }
8037 
8038 /*
8039     Special version for direct calls from Fortran
8040 */
8041 
8042 /* Change these macros so can be used in void function */
8043 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8044 #undef PetscCall
8045 #define PetscCall(...) \
8046   do { \
8047     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8048     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8049       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8050       return; \
8051     } \
8052   } while (0)
8053 
8054 #undef SETERRQ
8055 #define SETERRQ(comm, ierr, ...) \
8056   do { \
8057     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8058     return; \
8059   } while (0)
8060 
8061 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8062   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8063 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8064   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8065 #else
8066 #endif
8067 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8068 {
8069   Mat         mat = *mmat;
8070   PetscInt    m = *mm, n = *mn;
8071   InsertMode  addv = *maddv;
8072   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8073   PetscScalar value;
8074 
8075   MatCheckPreallocated(mat, 1);
8076   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8077   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8078   {
8079     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8080     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8081     PetscBool roworiented = aij->roworiented;
8082 
8083     /* Some Variables required in the macro */
8084     Mat         A     = aij->A;
8085     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8086     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8087     MatScalar  *aa;
8088     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8089     Mat         B                 = aij->B;
8090     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8091     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8092     MatScalar  *ba;
8093     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8094      * cannot use "#if defined" inside a macro. */
8095     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8096 
8097     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8098     PetscInt   nonew = a->nonew;
8099     MatScalar *ap1, *ap2;
8100 
8101     PetscFunctionBegin;
8102     PetscCall(MatSeqAIJGetArray(A, &aa));
8103     PetscCall(MatSeqAIJGetArray(B, &ba));
8104     for (i = 0; i < m; i++) {
8105       if (im[i] < 0) continue;
8106       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8107       if (im[i] >= rstart && im[i] < rend) {
8108         row      = im[i] - rstart;
8109         lastcol1 = -1;
8110         rp1      = aj + ai[row];
8111         ap1      = aa + ai[row];
8112         rmax1    = aimax[row];
8113         nrow1    = ailen[row];
8114         low1     = 0;
8115         high1    = nrow1;
8116         lastcol2 = -1;
8117         rp2      = bj + bi[row];
8118         ap2      = ba + bi[row];
8119         rmax2    = bimax[row];
8120         nrow2    = bilen[row];
8121         low2     = 0;
8122         high2    = nrow2;
8123 
8124         for (j = 0; j < n; j++) {
8125           if (roworiented) value = v[i * n + j];
8126           else value = v[i + j * m];
8127           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8128           if (in[j] >= cstart && in[j] < cend) {
8129             col = in[j] - cstart;
8130             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8131           } else if (in[j] < 0) continue;
8132           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8133             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8134           } else {
8135             if (mat->was_assembled) {
8136               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8137 #if defined(PETSC_USE_CTABLE)
8138               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8139               col--;
8140 #else
8141               col = aij->colmap[in[j]] - 1;
8142 #endif
8143               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8144                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8145                 col = in[j];
8146                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8147                 B        = aij->B;
8148                 b        = (Mat_SeqAIJ *)B->data;
8149                 bimax    = b->imax;
8150                 bi       = b->i;
8151                 bilen    = b->ilen;
8152                 bj       = b->j;
8153                 rp2      = bj + bi[row];
8154                 ap2      = ba + bi[row];
8155                 rmax2    = bimax[row];
8156                 nrow2    = bilen[row];
8157                 low2     = 0;
8158                 high2    = nrow2;
8159                 bm       = aij->B->rmap->n;
8160                 ba       = b->a;
8161                 inserted = PETSC_FALSE;
8162               }
8163             } else col = in[j];
8164             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8165           }
8166         }
8167       } else if (!aij->donotstash) {
8168         if (roworiented) {
8169           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8170         } else {
8171           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8172         }
8173       }
8174     }
8175     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8176     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8177   }
8178   PetscFunctionReturnVoid();
8179 }
8180 
8181 /* Undefining these here since they were redefined from their original definition above! No
8182  * other PETSc functions should be defined past this point, as it is impossible to recover the
8183  * original definitions */
8184 #undef PetscCall
8185 #undef SETERRQ
8186