xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f13dfd9ea68e0ddeee984e65c377a1819eab8a8a)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287 
288   PetscFunctionBegin;
289   PetscCall(MatGetSize(A, &m, &n));
290   PetscCall(PetscCalloc1(n, &work));
291   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
292   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
294   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
295   if (type == NORM_2) {
296     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
297     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
298   } else if (type == NORM_1) {
299     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
300     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
301   } else if (type == NORM_INFINITY) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
304   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
307   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
310   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
311   if (type == NORM_INFINITY) {
312     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
313   } else {
314     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
315   }
316   PetscCall(PetscFree(work));
317   if (type == NORM_2) {
318     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
319   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
320     for (i = 0; i < n; i++) reductions[i] /= m;
321   }
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
326 {
327   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
328   IS              sis, gis;
329   const PetscInt *isis, *igis;
330   PetscInt        n, *iis, nsis, ngis, rstart, i;
331 
332   PetscFunctionBegin;
333   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
334   PetscCall(MatFindNonzeroRows(a->B, &gis));
335   PetscCall(ISGetSize(gis, &ngis));
336   PetscCall(ISGetSize(sis, &nsis));
337   PetscCall(ISGetIndices(sis, &isis));
338   PetscCall(ISGetIndices(gis, &igis));
339 
340   PetscCall(PetscMalloc1(ngis + nsis, &iis));
341   PetscCall(PetscArraycpy(iis, igis, ngis));
342   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
343   n = ngis + nsis;
344   PetscCall(PetscSortRemoveDupsInt(&n, iis));
345   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
346   for (i = 0; i < n; i++) iis[i] += rstart;
347   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
348 
349   PetscCall(ISRestoreIndices(sis, &isis));
350   PetscCall(ISRestoreIndices(gis, &igis));
351   PetscCall(ISDestroy(&sis));
352   PetscCall(ISDestroy(&gis));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
356 /*
357   Local utility routine that creates a mapping from the global column
358 number to the local number in the off-diagonal part of the local
359 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
360 a slightly higher hash table cost; without it it is not scalable (each processor
361 has an order N integer array but is fast to access.
362 */
363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
364 {
365   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
366   PetscInt    n   = aij->B->cmap->n, i;
367 
368   PetscFunctionBegin;
369   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
370 #if defined(PETSC_USE_CTABLE)
371   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
372   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
373 #else
374   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
375   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
376 #endif
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
381   do { \
382     if (col <= lastcol1) low1 = 0; \
383     else high1 = nrow1; \
384     lastcol1 = col; \
385     while (high1 - low1 > 5) { \
386       t = (low1 + high1) / 2; \
387       if (rp1[t] > col) high1 = t; \
388       else low1 = t; \
389     } \
390     for (_i = low1; _i < high1; _i++) { \
391       if (rp1[_i] > col) break; \
392       if (rp1[_i] == col) { \
393         if (addv == ADD_VALUES) { \
394           ap1[_i] += value; \
395           /* Not sure LogFlops will slow dow the code or not */ \
396           (void)PetscLogFlops(1.0); \
397         } else ap1[_i] = value; \
398         goto a_noinsert; \
399       } \
400     } \
401     if (value == 0.0 && ignorezeroentries && row != col) { \
402       low1  = 0; \
403       high1 = nrow1; \
404       goto a_noinsert; \
405     } \
406     if (nonew == 1) { \
407       low1  = 0; \
408       high1 = nrow1; \
409       goto a_noinsert; \
410     } \
411     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
412     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
413     N = nrow1++ - 1; \
414     a->nz++; \
415     high1++; \
416     /* shift up all the later entries in this row */ \
417     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
418     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
419     rp1[_i] = col; \
420     ap1[_i] = value; \
421   a_noinsert:; \
422     ailen[row] = nrow1; \
423   } while (0)
424 
425 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
426   do { \
427     if (col <= lastcol2) low2 = 0; \
428     else high2 = nrow2; \
429     lastcol2 = col; \
430     while (high2 - low2 > 5) { \
431       t = (low2 + high2) / 2; \
432       if (rp2[t] > col) high2 = t; \
433       else low2 = t; \
434     } \
435     for (_i = low2; _i < high2; _i++) { \
436       if (rp2[_i] > col) break; \
437       if (rp2[_i] == col) { \
438         if (addv == ADD_VALUES) { \
439           ap2[_i] += value; \
440           (void)PetscLogFlops(1.0); \
441         } else ap2[_i] = value; \
442         goto b_noinsert; \
443       } \
444     } \
445     if (value == 0.0 && ignorezeroentries) { \
446       low2  = 0; \
447       high2 = nrow2; \
448       goto b_noinsert; \
449     } \
450     if (nonew == 1) { \
451       low2  = 0; \
452       high2 = nrow2; \
453       goto b_noinsert; \
454     } \
455     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
456     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
457     N = nrow2++ - 1; \
458     b->nz++; \
459     high2++; \
460     /* shift up all the later entries in this row */ \
461     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
462     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
463     rp2[_i] = col; \
464     ap2[_i] = value; \
465   b_noinsert:; \
466     bilen[row] = nrow2; \
467   } while (0)
468 
469 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
470 {
471   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
472   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
473   PetscInt     l, *garray                         = mat->garray, diag;
474   PetscScalar *aa, *ba;
475 
476   PetscFunctionBegin;
477   /* code only works for square matrices A */
478 
479   /* find size of row to the left of the diagonal part */
480   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
481   row = row - diag;
482   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
483     if (garray[b->j[b->i[row] + l]] > diag) break;
484   }
485   if (l) {
486     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
487     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
488     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
489   }
490 
491   /* diagonal part */
492   if (a->i[row + 1] - a->i[row]) {
493     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
494     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
495     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
496   }
497 
498   /* right of diagonal part */
499   if (b->i[row + 1] - b->i[row] - l) {
500     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
501     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
502     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
503   }
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506 
507 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
508 {
509   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
510   PetscScalar value = 0.0;
511   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
512   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
513   PetscBool   roworiented = aij->roworiented;
514 
515   /* Some Variables required in the macro */
516   Mat         A     = aij->A;
517   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
518   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
519   PetscBool   ignorezeroentries = a->ignorezeroentries;
520   Mat         B                 = aij->B;
521   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
522   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
523   MatScalar  *aa, *ba;
524   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
525   PetscInt    nonew;
526   MatScalar  *ap1, *ap2;
527 
528   PetscFunctionBegin;
529   PetscCall(MatSeqAIJGetArray(A, &aa));
530   PetscCall(MatSeqAIJGetArray(B, &ba));
531   for (i = 0; i < m; i++) {
532     if (im[i] < 0) continue;
533     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
534     if (im[i] >= rstart && im[i] < rend) {
535       row      = im[i] - rstart;
536       lastcol1 = -1;
537       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
538       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
539       rmax1    = aimax[row];
540       nrow1    = ailen[row];
541       low1     = 0;
542       high1    = nrow1;
543       lastcol2 = -1;
544       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
545       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
546       rmax2    = bimax[row];
547       nrow2    = bilen[row];
548       low2     = 0;
549       high2    = nrow2;
550 
551       for (j = 0; j < n; j++) {
552         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
553         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
554         if (in[j] >= cstart && in[j] < cend) {
555           col   = in[j] - cstart;
556           nonew = a->nonew;
557           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
558         } else if (in[j] < 0) {
559           continue;
560         } else {
561           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
562           if (mat->was_assembled) {
563             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
564 #if defined(PETSC_USE_CTABLE)
565             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
566             col--;
567 #else
568             col = aij->colmap[in[j]] - 1;
569 #endif
570             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
571               PetscCall(MatDisAssemble_MPIAIJ(mat));               /* Change aij->B from reduced/local format to expanded/global format */
572               col = in[j];
573               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
574               B     = aij->B;
575               b     = (Mat_SeqAIJ *)B->data;
576               bimax = b->imax;
577               bi    = b->i;
578               bilen = b->ilen;
579               bj    = b->j;
580               ba    = b->a;
581               rp2   = bj + bi[row];
582               ap2   = ba + bi[row];
583               rmax2 = bimax[row];
584               nrow2 = bilen[row];
585               low2  = 0;
586               high2 = nrow2;
587               bm    = aij->B->rmap->n;
588               ba    = b->a;
589             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
590               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
591                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
592               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
593             }
594           } else col = in[j];
595           nonew = b->nonew;
596           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
597         }
598       }
599     } else {
600       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
601       if (!aij->donotstash) {
602         mat->assembled = PETSC_FALSE;
603         if (roworiented) {
604           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
605         } else {
606           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         }
608       }
609     }
610   }
611   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
612   PetscCall(MatSeqAIJRestoreArray(B, &ba));
613   PetscFunctionReturn(PETSC_SUCCESS);
614 }
615 
616 /*
617     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
618     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
619     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
620 */
621 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
622 {
623   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
624   Mat         A      = aij->A; /* diagonal part of the matrix */
625   Mat         B      = aij->B; /* off-diagonal part of the matrix */
626   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
627   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
628   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
629   PetscInt   *ailen = a->ilen, *aj = a->j;
630   PetscInt   *bilen = b->ilen, *bj = b->j;
631   PetscInt    am          = aij->A->rmap->n, j;
632   PetscInt    diag_so_far = 0, dnz;
633   PetscInt    offd_so_far = 0, onz;
634 
635   PetscFunctionBegin;
636   /* Iterate over all rows of the matrix */
637   for (j = 0; j < am; j++) {
638     dnz = onz = 0;
639     /*  Iterate over all non-zero columns of the current row */
640     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
641       /* If column is in the diagonal */
642       if (mat_j[col] >= cstart && mat_j[col] < cend) {
643         aj[diag_so_far++] = mat_j[col] - cstart;
644         dnz++;
645       } else { /* off-diagonal entries */
646         bj[offd_so_far++] = mat_j[col];
647         onz++;
648       }
649     }
650     ailen[j] = dnz;
651     bilen[j] = onz;
652   }
653   PetscFunctionReturn(PETSC_SUCCESS);
654 }
655 
656 /*
657     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
658     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
659     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
660     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
661     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
662 */
663 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
664 {
665   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
666   Mat          A    = aij->A; /* diagonal part of the matrix */
667   Mat          B    = aij->B; /* off-diagonal part of the matrix */
668   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
669   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
670   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
671   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
672   PetscInt    *ailen = a->ilen, *aj = a->j;
673   PetscInt    *bilen = b->ilen, *bj = b->j;
674   PetscInt     am          = aij->A->rmap->n, j;
675   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
676   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
677   PetscScalar *aa = a->a, *ba = b->a;
678 
679   PetscFunctionBegin;
680   /* Iterate over all rows of the matrix */
681   for (j = 0; j < am; j++) {
682     dnz_row = onz_row = 0;
683     rowstart_offd     = full_offd_i[j];
684     rowstart_diag     = full_diag_i[j];
685     /*  Iterate over all non-zero columns of the current row */
686     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
687       /* If column is in the diagonal */
688       if (mat_j[col] >= cstart && mat_j[col] < cend) {
689         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
690         aa[rowstart_diag + dnz_row] = mat_a[col];
691         dnz_row++;
692       } else { /* off-diagonal entries */
693         bj[rowstart_offd + onz_row] = mat_j[col];
694         ba[rowstart_offd + onz_row] = mat_a[col];
695         onz_row++;
696       }
697     }
698     ailen[j] = dnz_row;
699     bilen[j] = onz_row;
700   }
701   PetscFunctionReturn(PETSC_SUCCESS);
702 }
703 
704 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
705 {
706   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
707   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
708   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
709 
710   PetscFunctionBegin;
711   for (i = 0; i < m; i++) {
712     if (idxm[i] < 0) continue; /* negative row */
713     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
714     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
715     row = idxm[i] - rstart;
716     for (j = 0; j < n; j++) {
717       if (idxn[j] < 0) continue; /* negative column */
718       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
719       if (idxn[j] >= cstart && idxn[j] < cend) {
720         col = idxn[j] - cstart;
721         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
722       } else {
723         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
724 #if defined(PETSC_USE_CTABLE)
725         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
726         col--;
727 #else
728         col = aij->colmap[idxn[j]] - 1;
729 #endif
730         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
731         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
732       }
733     }
734   }
735   PetscFunctionReturn(PETSC_SUCCESS);
736 }
737 
738 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
739 {
740   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
741   PetscInt    nstash, reallocs;
742 
743   PetscFunctionBegin;
744   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
745 
746   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
747   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
748   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
753 {
754   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
755   PetscMPIInt  n;
756   PetscInt     i, j, rstart, ncols, flg;
757   PetscInt    *row, *col;
758   PetscBool    other_disassembled;
759   PetscScalar *val;
760 
761   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
762 
763   PetscFunctionBegin;
764   if (!aij->donotstash && !mat->nooffprocentries) {
765     while (1) {
766       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
767       if (!flg) break;
768 
769       for (i = 0; i < n;) {
770         /* Now identify the consecutive vals belonging to the same row */
771         for (j = i, rstart = row[j]; j < n; j++) {
772           if (row[j] != rstart) break;
773         }
774         if (j < n) ncols = j - i;
775         else ncols = n - i;
776         /* Now assemble all these values with a single function call */
777         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
778         i = j;
779       }
780     }
781     PetscCall(MatStashScatterEnd_Private(&mat->stash));
782   }
783 #if defined(PETSC_HAVE_DEVICE)
784   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
785   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
786   if (mat->boundtocpu) {
787     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
788     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
789   }
790 #endif
791   PetscCall(MatAssemblyBegin(aij->A, mode));
792   PetscCall(MatAssemblyEnd(aij->A, mode));
793 
794   /* determine if any processor has disassembled, if so we must
795      also disassemble ourself, in order that we may reassemble. */
796   /*
797      if nonzero structure of submatrix B cannot change then we know that
798      no processor disassembled thus we can skip this stuff
799   */
800   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
801     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
802     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
803       PetscCall(MatDisAssemble_MPIAIJ(mat));
804     }
805   }
806   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
807   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
808 #if defined(PETSC_HAVE_DEVICE)
809   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
810 #endif
811   PetscCall(MatAssemblyBegin(aij->B, mode));
812   PetscCall(MatAssemblyEnd(aij->B, mode));
813 
814   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
815 
816   aij->rowvalues = NULL;
817 
818   PetscCall(VecDestroy(&aij->diag));
819 
820   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
821   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
822     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
823     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
824   }
825 #if defined(PETSC_HAVE_DEVICE)
826   mat->offloadmask = PETSC_OFFLOAD_BOTH;
827 #endif
828   PetscFunctionReturn(PETSC_SUCCESS);
829 }
830 
831 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
832 {
833   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
834 
835   PetscFunctionBegin;
836   PetscCall(MatZeroEntries(l->A));
837   PetscCall(MatZeroEntries(l->B));
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
842 {
843   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
844   PetscInt   *lrows;
845   PetscInt    r, len;
846   PetscBool   cong;
847 
848   PetscFunctionBegin;
849   /* get locally owned rows */
850   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
851   PetscCall(MatHasCongruentLayouts(A, &cong));
852   /* fix right-hand side if needed */
853   if (x && b) {
854     const PetscScalar *xx;
855     PetscScalar       *bb;
856 
857     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
858     PetscCall(VecGetArrayRead(x, &xx));
859     PetscCall(VecGetArray(b, &bb));
860     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
861     PetscCall(VecRestoreArrayRead(x, &xx));
862     PetscCall(VecRestoreArray(b, &bb));
863   }
864 
865   if (diag != 0.0 && cong) {
866     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
867     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
868   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
869     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
870     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
871     PetscInt    nnwA, nnwB;
872     PetscBool   nnzA, nnzB;
873 
874     nnwA = aijA->nonew;
875     nnwB = aijB->nonew;
876     nnzA = aijA->keepnonzeropattern;
877     nnzB = aijB->keepnonzeropattern;
878     if (!nnzA) {
879       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
880       aijA->nonew = 0;
881     }
882     if (!nnzB) {
883       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
884       aijB->nonew = 0;
885     }
886     /* Must zero here before the next loop */
887     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
888     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
889     for (r = 0; r < len; ++r) {
890       const PetscInt row = lrows[r] + A->rmap->rstart;
891       if (row >= A->cmap->N) continue;
892       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
893     }
894     aijA->nonew = nnwA;
895     aijB->nonew = nnwB;
896   } else {
897     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
898     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
899   }
900   PetscCall(PetscFree(lrows));
901   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
902   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
903 
904   /* only change matrix nonzero state if pattern was allowed to be changed */
905   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
906     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
907     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
908   }
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
912 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
913 {
914   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
915   PetscMPIInt        n = A->rmap->n;
916   PetscInt           i, j, r, m, len = 0;
917   PetscInt          *lrows, *owners = A->rmap->range;
918   PetscMPIInt        p = 0;
919   PetscSFNode       *rrows;
920   PetscSF            sf;
921   const PetscScalar *xx;
922   PetscScalar       *bb, *mask, *aij_a;
923   Vec                xmask, lmask;
924   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
925   const PetscInt    *aj, *ii, *ridx;
926   PetscScalar       *aa;
927 
928   PetscFunctionBegin;
929   /* Create SF where leaves are input rows and roots are owned rows */
930   PetscCall(PetscMalloc1(n, &lrows));
931   for (r = 0; r < n; ++r) lrows[r] = -1;
932   PetscCall(PetscMalloc1(N, &rrows));
933   for (r = 0; r < N; ++r) {
934     const PetscInt idx = rows[r];
935     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
936     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
937       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
938     }
939     rrows[r].rank  = p;
940     rrows[r].index = rows[r] - owners[p];
941   }
942   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
943   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
944   /* Collect flags for rows to be zeroed */
945   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
946   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
947   PetscCall(PetscSFDestroy(&sf));
948   /* Compress and put in row numbers */
949   for (r = 0; r < n; ++r)
950     if (lrows[r] >= 0) lrows[len++] = r;
951   /* zero diagonal part of matrix */
952   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
953   /* handle off-diagonal part of matrix */
954   PetscCall(MatCreateVecs(A, &xmask, NULL));
955   PetscCall(VecDuplicate(l->lvec, &lmask));
956   PetscCall(VecGetArray(xmask, &bb));
957   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
958   PetscCall(VecRestoreArray(xmask, &bb));
959   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
960   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
961   PetscCall(VecDestroy(&xmask));
962   if (x && b) { /* this code is buggy when the row and column layout don't match */
963     PetscBool cong;
964 
965     PetscCall(MatHasCongruentLayouts(A, &cong));
966     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
967     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
968     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
969     PetscCall(VecGetArrayRead(l->lvec, &xx));
970     PetscCall(VecGetArray(b, &bb));
971   }
972   PetscCall(VecGetArray(lmask, &mask));
973   /* remove zeroed rows of off-diagonal matrix */
974   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
975   ii = aij->i;
976   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
977   /* loop over all elements of off process part of matrix zeroing removed columns*/
978   if (aij->compressedrow.use) {
979     m    = aij->compressedrow.nrows;
980     ii   = aij->compressedrow.i;
981     ridx = aij->compressedrow.rindex;
982     for (i = 0; i < m; i++) {
983       n  = ii[i + 1] - ii[i];
984       aj = aij->j + ii[i];
985       aa = aij_a + ii[i];
986 
987       for (j = 0; j < n; j++) {
988         if (PetscAbsScalar(mask[*aj])) {
989           if (b) bb[*ridx] -= *aa * xx[*aj];
990           *aa = 0.0;
991         }
992         aa++;
993         aj++;
994       }
995       ridx++;
996     }
997   } else { /* do not use compressed row format */
998     m = l->B->rmap->n;
999     for (i = 0; i < m; i++) {
1000       n  = ii[i + 1] - ii[i];
1001       aj = aij->j + ii[i];
1002       aa = aij_a + ii[i];
1003       for (j = 0; j < n; j++) {
1004         if (PetscAbsScalar(mask[*aj])) {
1005           if (b) bb[i] -= *aa * xx[*aj];
1006           *aa = 0.0;
1007         }
1008         aa++;
1009         aj++;
1010       }
1011     }
1012   }
1013   if (x && b) {
1014     PetscCall(VecRestoreArray(b, &bb));
1015     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1016   }
1017   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1018   PetscCall(VecRestoreArray(lmask, &mask));
1019   PetscCall(VecDestroy(&lmask));
1020   PetscCall(PetscFree(lrows));
1021 
1022   /* only change matrix nonzero state if pattern was allowed to be changed */
1023   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1024     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1025     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1026   }
1027   PetscFunctionReturn(PETSC_SUCCESS);
1028 }
1029 
1030 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1031 {
1032   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1033   PetscInt    nt;
1034   VecScatter  Mvctx = a->Mvctx;
1035 
1036   PetscFunctionBegin;
1037   PetscCall(VecGetLocalSize(xx, &nt));
1038   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1039   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1040   PetscUseTypeMethod(a->A, mult, xx, yy);
1041   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1043   PetscFunctionReturn(PETSC_SUCCESS);
1044 }
1045 
1046 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1047 {
1048   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1049 
1050   PetscFunctionBegin;
1051   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1056 {
1057   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1058   VecScatter  Mvctx = a->Mvctx;
1059 
1060   PetscFunctionBegin;
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   /* do nondiagonal part */
1074   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1075   /* do local part */
1076   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1077   /* add partial results together */
1078   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1079   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1080   PetscFunctionReturn(PETSC_SUCCESS);
1081 }
1082 
1083 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1084 {
1085   MPI_Comm    comm;
1086   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1087   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1088   IS          Me, Notme;
1089   PetscInt    M, N, first, last, *notme, i;
1090   PetscBool   lf;
1091   PetscMPIInt size;
1092 
1093   PetscFunctionBegin;
1094   /* Easy test: symmetric diagonal block */
1095   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1096   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1097   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1098   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1099   PetscCallMPI(MPI_Comm_size(comm, &size));
1100   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1101 
1102   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1103   PetscCall(MatGetSize(Amat, &M, &N));
1104   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1105   PetscCall(PetscMalloc1(N - last + first, &notme));
1106   for (i = 0; i < first; i++) notme[i] = i;
1107   for (i = last; i < M; i++) notme[i - last + first] = i;
1108   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1109   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1110   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1111   Aoff = Aoffs[0];
1112   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1113   Boff = Boffs[0];
1114   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1115   PetscCall(MatDestroyMatrices(1, &Aoffs));
1116   PetscCall(MatDestroyMatrices(1, &Boffs));
1117   PetscCall(ISDestroy(&Me));
1118   PetscCall(ISDestroy(&Notme));
1119   PetscCall(PetscFree(notme));
1120   PetscFunctionReturn(PETSC_SUCCESS);
1121 }
1122 
1123 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1124 {
1125   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1126 
1127   PetscFunctionBegin;
1128   /* do nondiagonal part */
1129   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1130   /* do local part */
1131   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1132   /* add partial results together */
1133   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1134   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1135   PetscFunctionReturn(PETSC_SUCCESS);
1136 }
1137 
1138 /*
1139   This only works correctly for square matrices where the subblock A->A is the
1140    diagonal block
1141 */
1142 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1143 {
1144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1145 
1146   PetscFunctionBegin;
1147   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1148   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1149   PetscCall(MatGetDiagonal(a->A, v));
1150   PetscFunctionReturn(PETSC_SUCCESS);
1151 }
1152 
1153 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1154 {
1155   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1156 
1157   PetscFunctionBegin;
1158   PetscCall(MatScale(a->A, aa));
1159   PetscCall(MatScale(a->B, aa));
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162 
1163 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1164 {
1165   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1166   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1167   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1168   const PetscInt    *garray = aij->garray;
1169   const PetscScalar *aa, *ba;
1170   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1171   PetscInt64         nz, hnz;
1172   PetscInt          *rowlens;
1173   PetscInt          *colidxs;
1174   PetscScalar       *matvals;
1175   PetscMPIInt        rank;
1176 
1177   PetscFunctionBegin;
1178   PetscCall(PetscViewerSetUp(viewer));
1179 
1180   M  = mat->rmap->N;
1181   N  = mat->cmap->N;
1182   m  = mat->rmap->n;
1183   rs = mat->rmap->rstart;
1184   cs = mat->cmap->rstart;
1185   nz = A->nz + B->nz;
1186 
1187   /* write matrix header */
1188   header[0] = MAT_FILE_CLASSID;
1189   header[1] = M;
1190   header[2] = N;
1191   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1192   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1193   if (rank == 0) {
1194     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1195     else header[3] = (PetscInt)hnz;
1196   }
1197   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1198 
1199   /* fill in and store row lengths  */
1200   PetscCall(PetscMalloc1(m, &rowlens));
1201   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1202   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1203   PetscCall(PetscFree(rowlens));
1204 
1205   /* fill in and store column indices */
1206   PetscCall(PetscMalloc1(nz, &colidxs));
1207   for (cnt = 0, i = 0; i < m; i++) {
1208     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1209       if (garray[B->j[jb]] > cs) break;
1210       colidxs[cnt++] = garray[B->j[jb]];
1211     }
1212     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1213     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1214   }
1215   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1216   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1217   PetscCall(PetscFree(colidxs));
1218 
1219   /* fill in and store nonzero values */
1220   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1221   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1222   PetscCall(PetscMalloc1(nz, &matvals));
1223   for (cnt = 0, i = 0; i < m; i++) {
1224     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1225       if (garray[B->j[jb]] > cs) break;
1226       matvals[cnt++] = ba[jb];
1227     }
1228     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1229     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1230   }
1231   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1232   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1233   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1234   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1235   PetscCall(PetscFree(matvals));
1236 
1237   /* write block size option to the viewer's .info file */
1238   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1239   PetscFunctionReturn(PETSC_SUCCESS);
1240 }
1241 
1242 #include <petscdraw.h>
1243 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1244 {
1245   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1246   PetscMPIInt       rank = aij->rank, size = aij->size;
1247   PetscBool         isdraw, iascii, isbinary;
1248   PetscViewer       sviewer;
1249   PetscViewerFormat format;
1250 
1251   PetscFunctionBegin;
1252   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1253   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1255   if (iascii) {
1256     PetscCall(PetscViewerGetFormat(viewer, &format));
1257     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1258       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1259       PetscCall(PetscMalloc1(size, &nz));
1260       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1261       for (i = 0; i < (PetscInt)size; i++) {
1262         nmax = PetscMax(nmax, nz[i]);
1263         nmin = PetscMin(nmin, nz[i]);
1264         navg += nz[i];
1265       }
1266       PetscCall(PetscFree(nz));
1267       navg = navg / size;
1268       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1269       PetscFunctionReturn(PETSC_SUCCESS);
1270     }
1271     PetscCall(PetscViewerGetFormat(viewer, &format));
1272     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1273       MatInfo   info;
1274       PetscInt *inodes = NULL;
1275 
1276       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1277       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1278       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1279       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1280       if (!inodes) {
1281         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1282                                                      (double)info.memory));
1283       } else {
1284         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1285                                                      (double)info.memory));
1286       }
1287       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1288       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1289       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(PetscViewerFlush(viewer));
1292       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1293       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1294       PetscCall(VecScatterView(aij->Mvctx, viewer));
1295       PetscFunctionReturn(PETSC_SUCCESS);
1296     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1297       PetscInt inodecount, inodelimit, *inodes;
1298       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1299       if (inodes) {
1300         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1301       } else {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1303       }
1304       PetscFunctionReturn(PETSC_SUCCESS);
1305     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     }
1308   } else if (isbinary) {
1309     if (size == 1) {
1310       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1311       PetscCall(MatView(aij->A, viewer));
1312     } else {
1313       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1314     }
1315     PetscFunctionReturn(PETSC_SUCCESS);
1316   } else if (iascii && size == 1) {
1317     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1318     PetscCall(MatView(aij->A, viewer));
1319     PetscFunctionReturn(PETSC_SUCCESS);
1320   } else if (isdraw) {
1321     PetscDraw draw;
1322     PetscBool isnull;
1323     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1324     PetscCall(PetscDrawIsNull(draw, &isnull));
1325     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1326   }
1327 
1328   { /* assemble the entire matrix onto first processor */
1329     Mat A = NULL, Av;
1330     IS  isrow, iscol;
1331 
1332     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1333     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1334     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1335     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1336     /*  The commented code uses MatCreateSubMatrices instead */
1337     /*
1338     Mat *AA, A = NULL, Av;
1339     IS  isrow,iscol;
1340 
1341     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1342     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1343     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1344     if (rank == 0) {
1345        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1346        A    = AA[0];
1347        Av   = AA[0];
1348     }
1349     PetscCall(MatDestroySubMatrices(1,&AA));
1350 */
1351     PetscCall(ISDestroy(&iscol));
1352     PetscCall(ISDestroy(&isrow));
1353     /*
1354        Everyone has to call to draw the matrix since the graphics waits are
1355        synchronized across all processors that share the PetscDraw object
1356     */
1357     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1358     if (rank == 0) {
1359       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1360       PetscCall(MatView_SeqAIJ(Av, sviewer));
1361     }
1362     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1363     PetscCall(MatDestroy(&A));
1364   }
1365   PetscFunctionReturn(PETSC_SUCCESS);
1366 }
1367 
1368 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1369 {
1370   PetscBool iascii, isdraw, issocket, isbinary;
1371 
1372   PetscFunctionBegin;
1373   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1374   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1377   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1378   PetscFunctionReturn(PETSC_SUCCESS);
1379 }
1380 
1381 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1382 {
1383   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1384   Vec         bb1 = NULL;
1385   PetscBool   hasop;
1386 
1387   PetscFunctionBegin;
1388   if (flag == SOR_APPLY_UPPER) {
1389     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1390     PetscFunctionReturn(PETSC_SUCCESS);
1391   }
1392 
1393   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1394 
1395   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1396     if (flag & SOR_ZERO_INITIAL_GUESS) {
1397       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1398       its--;
1399     }
1400 
1401     while (its--) {
1402       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1403       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1404 
1405       /* update rhs: bb1 = bb - B*x */
1406       PetscCall(VecScale(mat->lvec, -1.0));
1407       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1408 
1409       /* local sweep */
1410       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1411     }
1412   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1413     if (flag & SOR_ZERO_INITIAL_GUESS) {
1414       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1415       its--;
1416     }
1417     while (its--) {
1418       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1419       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1420 
1421       /* update rhs: bb1 = bb - B*x */
1422       PetscCall(VecScale(mat->lvec, -1.0));
1423       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1424 
1425       /* local sweep */
1426       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1427     }
1428   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1429     if (flag & SOR_ZERO_INITIAL_GUESS) {
1430       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1431       its--;
1432     }
1433     while (its--) {
1434       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1435       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1436 
1437       /* update rhs: bb1 = bb - B*x */
1438       PetscCall(VecScale(mat->lvec, -1.0));
1439       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1440 
1441       /* local sweep */
1442       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1443     }
1444   } else if (flag & SOR_EISENSTAT) {
1445     Vec xx1;
1446 
1447     PetscCall(VecDuplicate(bb, &xx1));
1448     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1449 
1450     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1451     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1452     if (!mat->diag) {
1453       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1454       PetscCall(MatGetDiagonal(matin, mat->diag));
1455     }
1456     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1457     if (hasop) {
1458       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1459     } else {
1460       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1461     }
1462     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1463 
1464     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1465 
1466     /* local sweep */
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1468     PetscCall(VecAXPY(xx, 1.0, xx1));
1469     PetscCall(VecDestroy(&xx1));
1470   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1471 
1472   PetscCall(VecDestroy(&bb1));
1473 
1474   matin->factorerrortype = mat->A->factorerrortype;
1475   PetscFunctionReturn(PETSC_SUCCESS);
1476 }
1477 
1478 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1479 {
1480   Mat             aA, aB, Aperm;
1481   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1482   PetscScalar    *aa, *ba;
1483   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1484   PetscSF         rowsf, sf;
1485   IS              parcolp = NULL;
1486   PetscBool       done;
1487 
1488   PetscFunctionBegin;
1489   PetscCall(MatGetLocalSize(A, &m, &n));
1490   PetscCall(ISGetIndices(rowp, &rwant));
1491   PetscCall(ISGetIndices(colp, &cwant));
1492   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1493 
1494   /* Invert row permutation to find out where my rows should go */
1495   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1496   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1497   PetscCall(PetscSFSetFromOptions(rowsf));
1498   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1499   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1500   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1501 
1502   /* Invert column permutation to find out where my columns should go */
1503   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1504   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1505   PetscCall(PetscSFSetFromOptions(sf));
1506   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1507   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1508   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1509   PetscCall(PetscSFDestroy(&sf));
1510 
1511   PetscCall(ISRestoreIndices(rowp, &rwant));
1512   PetscCall(ISRestoreIndices(colp, &cwant));
1513   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1514 
1515   /* Find out where my gcols should go */
1516   PetscCall(MatGetSize(aB, NULL, &ng));
1517   PetscCall(PetscMalloc1(ng, &gcdest));
1518   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1519   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1520   PetscCall(PetscSFSetFromOptions(sf));
1521   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1522   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1523   PetscCall(PetscSFDestroy(&sf));
1524 
1525   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1526   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1527   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1528   for (i = 0; i < m; i++) {
1529     PetscInt    row = rdest[i];
1530     PetscMPIInt rowner;
1531     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1532     for (j = ai[i]; j < ai[i + 1]; j++) {
1533       PetscInt    col = cdest[aj[j]];
1534       PetscMPIInt cowner;
1535       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1536       if (rowner == cowner) dnnz[i]++;
1537       else onnz[i]++;
1538     }
1539     for (j = bi[i]; j < bi[i + 1]; j++) {
1540       PetscInt    col = gcdest[bj[j]];
1541       PetscMPIInt cowner;
1542       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1543       if (rowner == cowner) dnnz[i]++;
1544       else onnz[i]++;
1545     }
1546   }
1547   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1548   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1551   PetscCall(PetscSFDestroy(&rowsf));
1552 
1553   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1554   PetscCall(MatSeqAIJGetArray(aA, &aa));
1555   PetscCall(MatSeqAIJGetArray(aB, &ba));
1556   for (i = 0; i < m; i++) {
1557     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1558     PetscInt  j0, rowlen;
1559     rowlen = ai[i + 1] - ai[i];
1560     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1561       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1562       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1563     }
1564     rowlen = bi[i + 1] - bi[i];
1565     for (j0 = j = 0; j < rowlen; j0 = j) {
1566       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1567       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1568     }
1569   }
1570   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1571   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1572   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1573   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1574   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1575   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1576   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1577   PetscCall(PetscFree3(work, rdest, cdest));
1578   PetscCall(PetscFree(gcdest));
1579   if (parcolp) PetscCall(ISDestroy(&colp));
1580   *B = Aperm;
1581   PetscFunctionReturn(PETSC_SUCCESS);
1582 }
1583 
1584 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1585 {
1586   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1587 
1588   PetscFunctionBegin;
1589   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1590   if (ghosts) *ghosts = aij->garray;
1591   PetscFunctionReturn(PETSC_SUCCESS);
1592 }
1593 
1594 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1595 {
1596   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1597   Mat            A = mat->A, B = mat->B;
1598   PetscLogDouble isend[5], irecv[5];
1599 
1600   PetscFunctionBegin;
1601   info->block_size = 1.0;
1602   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1603 
1604   isend[0] = info->nz_used;
1605   isend[1] = info->nz_allocated;
1606   isend[2] = info->nz_unneeded;
1607   isend[3] = info->memory;
1608   isend[4] = info->mallocs;
1609 
1610   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1611 
1612   isend[0] += info->nz_used;
1613   isend[1] += info->nz_allocated;
1614   isend[2] += info->nz_unneeded;
1615   isend[3] += info->memory;
1616   isend[4] += info->mallocs;
1617   if (flag == MAT_LOCAL) {
1618     info->nz_used      = isend[0];
1619     info->nz_allocated = isend[1];
1620     info->nz_unneeded  = isend[2];
1621     info->memory       = isend[3];
1622     info->mallocs      = isend[4];
1623   } else if (flag == MAT_GLOBAL_MAX) {
1624     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1625 
1626     info->nz_used      = irecv[0];
1627     info->nz_allocated = irecv[1];
1628     info->nz_unneeded  = irecv[2];
1629     info->memory       = irecv[3];
1630     info->mallocs      = irecv[4];
1631   } else if (flag == MAT_GLOBAL_SUM) {
1632     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1633 
1634     info->nz_used      = irecv[0];
1635     info->nz_allocated = irecv[1];
1636     info->nz_unneeded  = irecv[2];
1637     info->memory       = irecv[3];
1638     info->mallocs      = irecv[4];
1639   }
1640   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1641   info->fill_ratio_needed = 0;
1642   info->factor_mallocs    = 0;
1643   PetscFunctionReturn(PETSC_SUCCESS);
1644 }
1645 
1646 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1647 {
1648   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1649 
1650   PetscFunctionBegin;
1651   switch (op) {
1652   case MAT_NEW_NONZERO_LOCATIONS:
1653   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1654   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1655   case MAT_KEEP_NONZERO_PATTERN:
1656   case MAT_NEW_NONZERO_LOCATION_ERR:
1657   case MAT_USE_INODES:
1658   case MAT_IGNORE_ZERO_ENTRIES:
1659   case MAT_FORM_EXPLICIT_TRANSPOSE:
1660     MatCheckPreallocated(A, 1);
1661     PetscCall(MatSetOption(a->A, op, flg));
1662     PetscCall(MatSetOption(a->B, op, flg));
1663     break;
1664   case MAT_ROW_ORIENTED:
1665     MatCheckPreallocated(A, 1);
1666     a->roworiented = flg;
1667 
1668     PetscCall(MatSetOption(a->A, op, flg));
1669     PetscCall(MatSetOption(a->B, op, flg));
1670     break;
1671   case MAT_FORCE_DIAGONAL_ENTRIES:
1672   case MAT_SORTED_FULL:
1673     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1674     break;
1675   case MAT_IGNORE_OFF_PROC_ENTRIES:
1676     a->donotstash = flg;
1677     break;
1678   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1679   case MAT_SPD:
1680   case MAT_SYMMETRIC:
1681   case MAT_STRUCTURALLY_SYMMETRIC:
1682   case MAT_HERMITIAN:
1683   case MAT_SYMMETRY_ETERNAL:
1684   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1685   case MAT_SPD_ETERNAL:
1686     /* if the diagonal matrix is square it inherits some of the properties above */
1687     break;
1688   case MAT_SUBMAT_SINGLEIS:
1689     A->submat_singleis = flg;
1690     break;
1691   case MAT_STRUCTURE_ONLY:
1692     /* The option is handled directly by MatSetOption() */
1693     break;
1694   default:
1695     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1696   }
1697   PetscFunctionReturn(PETSC_SUCCESS);
1698 }
1699 
1700 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1701 {
1702   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1703   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1704   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1705   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1706   PetscInt    *cmap, *idx_p;
1707 
1708   PetscFunctionBegin;
1709   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1710   mat->getrowactive = PETSC_TRUE;
1711 
1712   if (!mat->rowvalues && (idx || v)) {
1713     /*
1714         allocate enough space to hold information from the longest row.
1715     */
1716     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1717     PetscInt    max = 1, tmp;
1718     for (i = 0; i < matin->rmap->n; i++) {
1719       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1720       if (max < tmp) max = tmp;
1721     }
1722     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1723   }
1724 
1725   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1726   lrow = row - rstart;
1727 
1728   pvA = &vworkA;
1729   pcA = &cworkA;
1730   pvB = &vworkB;
1731   pcB = &cworkB;
1732   if (!v) {
1733     pvA = NULL;
1734     pvB = NULL;
1735   }
1736   if (!idx) {
1737     pcA = NULL;
1738     if (!v) pcB = NULL;
1739   }
1740   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1741   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1742   nztot = nzA + nzB;
1743 
1744   cmap = mat->garray;
1745   if (v || idx) {
1746     if (nztot) {
1747       /* Sort by increasing column numbers, assuming A and B already sorted */
1748       PetscInt imark = -1;
1749       if (v) {
1750         *v = v_p = mat->rowvalues;
1751         for (i = 0; i < nzB; i++) {
1752           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1753           else break;
1754         }
1755         imark = i;
1756         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1757         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1758       }
1759       if (idx) {
1760         *idx = idx_p = mat->rowindices;
1761         if (imark > -1) {
1762           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1763         } else {
1764           for (i = 0; i < nzB; i++) {
1765             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1766             else break;
1767           }
1768           imark = i;
1769         }
1770         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1771         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1772       }
1773     } else {
1774       if (idx) *idx = NULL;
1775       if (v) *v = NULL;
1776     }
1777   }
1778   *nz = nztot;
1779   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1780   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1781   PetscFunctionReturn(PETSC_SUCCESS);
1782 }
1783 
1784 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1785 {
1786   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1787 
1788   PetscFunctionBegin;
1789   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1790   aij->getrowactive = PETSC_FALSE;
1791   PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793 
1794 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1795 {
1796   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1797   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1798   PetscInt         i, j, cstart = mat->cmap->rstart;
1799   PetscReal        sum = 0.0;
1800   const MatScalar *v, *amata, *bmata;
1801 
1802   PetscFunctionBegin;
1803   if (aij->size == 1) {
1804     PetscCall(MatNorm(aij->A, type, norm));
1805   } else {
1806     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1807     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1808     if (type == NORM_FROBENIUS) {
1809       v = amata;
1810       for (i = 0; i < amat->nz; i++) {
1811         sum += PetscRealPart(PetscConj(*v) * (*v));
1812         v++;
1813       }
1814       v = bmata;
1815       for (i = 0; i < bmat->nz; i++) {
1816         sum += PetscRealPart(PetscConj(*v) * (*v));
1817         v++;
1818       }
1819       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1820       *norm = PetscSqrtReal(*norm);
1821       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1822     } else if (type == NORM_1) { /* max column norm */
1823       PetscReal *tmp, *tmp2;
1824       PetscInt  *jj, *garray = aij->garray;
1825       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1826       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1827       *norm = 0.0;
1828       v     = amata;
1829       jj    = amat->j;
1830       for (j = 0; j < amat->nz; j++) {
1831         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1832         v++;
1833       }
1834       v  = bmata;
1835       jj = bmat->j;
1836       for (j = 0; j < bmat->nz; j++) {
1837         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1838         v++;
1839       }
1840       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1841       for (j = 0; j < mat->cmap->N; j++) {
1842         if (tmp2[j] > *norm) *norm = tmp2[j];
1843       }
1844       PetscCall(PetscFree(tmp));
1845       PetscCall(PetscFree(tmp2));
1846       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1847     } else if (type == NORM_INFINITY) { /* max row norm */
1848       PetscReal ntemp = 0.0;
1849       for (j = 0; j < aij->A->rmap->n; j++) {
1850         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1851         sum = 0.0;
1852         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1853           sum += PetscAbsScalar(*v);
1854           v++;
1855         }
1856         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1857         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1858           sum += PetscAbsScalar(*v);
1859           v++;
1860         }
1861         if (sum > ntemp) ntemp = sum;
1862       }
1863       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1864       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1865     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1866     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1867     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1868   }
1869   PetscFunctionReturn(PETSC_SUCCESS);
1870 }
1871 
1872 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1873 {
1874   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1875   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1876   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1877   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1878   Mat              B, A_diag, *B_diag;
1879   const MatScalar *pbv, *bv;
1880 
1881   PetscFunctionBegin;
1882   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1883   ma = A->rmap->n;
1884   na = A->cmap->n;
1885   mb = a->B->rmap->n;
1886   nb = a->B->cmap->n;
1887   ai = Aloc->i;
1888   aj = Aloc->j;
1889   bi = Bloc->i;
1890   bj = Bloc->j;
1891   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1892     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1893     PetscSFNode         *oloc;
1894     PETSC_UNUSED PetscSF sf;
1895 
1896     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1897     /* compute d_nnz for preallocation */
1898     PetscCall(PetscArrayzero(d_nnz, na));
1899     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1900     /* compute local off-diagonal contributions */
1901     PetscCall(PetscArrayzero(g_nnz, nb));
1902     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1903     /* map those to global */
1904     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1905     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1906     PetscCall(PetscSFSetFromOptions(sf));
1907     PetscCall(PetscArrayzero(o_nnz, na));
1908     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1909     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1910     PetscCall(PetscSFDestroy(&sf));
1911 
1912     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1913     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1914     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1915     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1916     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1917     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1918   } else {
1919     B = *matout;
1920     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1921   }
1922 
1923   b           = (Mat_MPIAIJ *)B->data;
1924   A_diag      = a->A;
1925   B_diag      = &b->A;
1926   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1927   A_diag_ncol = A_diag->cmap->N;
1928   B_diag_ilen = sub_B_diag->ilen;
1929   B_diag_i    = sub_B_diag->i;
1930 
1931   /* Set ilen for diagonal of B */
1932   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1933 
1934   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1935   very quickly (=without using MatSetValues), because all writes are local. */
1936   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1937   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1938 
1939   /* copy over the B part */
1940   PetscCall(PetscMalloc1(bi[mb], &cols));
1941   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1942   pbv = bv;
1943   row = A->rmap->rstart;
1944   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1945   cols_tmp = cols;
1946   for (i = 0; i < mb; i++) {
1947     ncol = bi[i + 1] - bi[i];
1948     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1949     row++;
1950     if (pbv) pbv += ncol;
1951     if (cols_tmp) cols_tmp += ncol;
1952   }
1953   PetscCall(PetscFree(cols));
1954   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1955 
1956   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1957   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1958   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1959     *matout = B;
1960   } else {
1961     PetscCall(MatHeaderMerge(A, &B));
1962   }
1963   PetscFunctionReturn(PETSC_SUCCESS);
1964 }
1965 
1966 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1967 {
1968   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1969   Mat         a = aij->A, b = aij->B;
1970   PetscInt    s1, s2, s3;
1971 
1972   PetscFunctionBegin;
1973   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1974   if (rr) {
1975     PetscCall(VecGetLocalSize(rr, &s1));
1976     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1977     /* Overlap communication with computation. */
1978     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1979   }
1980   if (ll) {
1981     PetscCall(VecGetLocalSize(ll, &s1));
1982     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1983     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1984   }
1985   /* scale  the diagonal block */
1986   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1987 
1988   if (rr) {
1989     /* Do a scatter end and then right scale the off-diagonal block */
1990     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1991     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1992   }
1993   PetscFunctionReturn(PETSC_SUCCESS);
1994 }
1995 
1996 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1997 {
1998   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1999 
2000   PetscFunctionBegin;
2001   PetscCall(MatSetUnfactored(a->A));
2002   PetscFunctionReturn(PETSC_SUCCESS);
2003 }
2004 
2005 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2006 {
2007   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2008   Mat         a, b, c, d;
2009   PetscBool   flg;
2010 
2011   PetscFunctionBegin;
2012   a = matA->A;
2013   b = matA->B;
2014   c = matB->A;
2015   d = matB->B;
2016 
2017   PetscCall(MatEqual(a, c, &flg));
2018   if (flg) PetscCall(MatEqual(b, d, &flg));
2019   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2020   PetscFunctionReturn(PETSC_SUCCESS);
2021 }
2022 
2023 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2024 {
2025   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2026   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2027 
2028   PetscFunctionBegin;
2029   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2030   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2031     /* because of the column compression in the off-processor part of the matrix a->B,
2032        the number of columns in a->B and b->B may be different, hence we cannot call
2033        the MatCopy() directly on the two parts. If need be, we can provide a more
2034        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2035        then copying the submatrices */
2036     PetscCall(MatCopy_Basic(A, B, str));
2037   } else {
2038     PetscCall(MatCopy(a->A, b->A, str));
2039     PetscCall(MatCopy(a->B, b->B, str));
2040   }
2041   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2042   PetscFunctionReturn(PETSC_SUCCESS);
2043 }
2044 
2045 /*
2046    Computes the number of nonzeros per row needed for preallocation when X and Y
2047    have different nonzero structure.
2048 */
2049 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2050 {
2051   PetscInt i, j, k, nzx, nzy;
2052 
2053   PetscFunctionBegin;
2054   /* Set the number of nonzeros in the new matrix */
2055   for (i = 0; i < m; i++) {
2056     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2057     nzx    = xi[i + 1] - xi[i];
2058     nzy    = yi[i + 1] - yi[i];
2059     nnz[i] = 0;
2060     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2061       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2062       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2063       nnz[i]++;
2064     }
2065     for (; k < nzy; k++) nnz[i]++;
2066   }
2067   PetscFunctionReturn(PETSC_SUCCESS);
2068 }
2069 
2070 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2071 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2072 {
2073   PetscInt    m = Y->rmap->N;
2074   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2075   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2076 
2077   PetscFunctionBegin;
2078   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2079   PetscFunctionReturn(PETSC_SUCCESS);
2080 }
2081 
2082 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2083 {
2084   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2085 
2086   PetscFunctionBegin;
2087   if (str == SAME_NONZERO_PATTERN) {
2088     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2089     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2090   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2091     PetscCall(MatAXPY_Basic(Y, a, X, str));
2092   } else {
2093     Mat       B;
2094     PetscInt *nnz_d, *nnz_o;
2095 
2096     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2097     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2098     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2099     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2100     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2101     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2102     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2103     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2104     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2105     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2106     PetscCall(MatHeaderMerge(Y, &B));
2107     PetscCall(PetscFree(nnz_d));
2108     PetscCall(PetscFree(nnz_o));
2109   }
2110   PetscFunctionReturn(PETSC_SUCCESS);
2111 }
2112 
2113 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2114 
2115 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2116 {
2117   PetscFunctionBegin;
2118   if (PetscDefined(USE_COMPLEX)) {
2119     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2120 
2121     PetscCall(MatConjugate_SeqAIJ(aij->A));
2122     PetscCall(MatConjugate_SeqAIJ(aij->B));
2123   }
2124   PetscFunctionReturn(PETSC_SUCCESS);
2125 }
2126 
2127 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2128 {
2129   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2130 
2131   PetscFunctionBegin;
2132   PetscCall(MatRealPart(a->A));
2133   PetscCall(MatRealPart(a->B));
2134   PetscFunctionReturn(PETSC_SUCCESS);
2135 }
2136 
2137 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2140 
2141   PetscFunctionBegin;
2142   PetscCall(MatImaginaryPart(a->A));
2143   PetscCall(MatImaginaryPart(a->B));
2144   PetscFunctionReturn(PETSC_SUCCESS);
2145 }
2146 
2147 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2148 {
2149   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2150   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2151   PetscScalar       *va, *vv;
2152   Vec                vB, vA;
2153   const PetscScalar *vb;
2154 
2155   PetscFunctionBegin;
2156   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2157   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2158 
2159   PetscCall(VecGetArrayWrite(vA, &va));
2160   if (idx) {
2161     for (i = 0; i < m; i++) {
2162       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2163     }
2164   }
2165 
2166   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2167   PetscCall(PetscMalloc1(m, &idxb));
2168   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2169 
2170   PetscCall(VecGetArrayWrite(v, &vv));
2171   PetscCall(VecGetArrayRead(vB, &vb));
2172   for (i = 0; i < m; i++) {
2173     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2174       vv[i] = vb[i];
2175       if (idx) idx[i] = a->garray[idxb[i]];
2176     } else {
2177       vv[i] = va[i];
2178       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2179     }
2180   }
2181   PetscCall(VecRestoreArrayWrite(vA, &vv));
2182   PetscCall(VecRestoreArrayWrite(vA, &va));
2183   PetscCall(VecRestoreArrayRead(vB, &vb));
2184   PetscCall(PetscFree(idxb));
2185   PetscCall(VecDestroy(&vA));
2186   PetscCall(VecDestroy(&vB));
2187   PetscFunctionReturn(PETSC_SUCCESS);
2188 }
2189 
2190 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2191 {
2192   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2193   PetscInt    m = A->rmap->n;
2194   Vec         vB, vA;
2195 
2196   PetscFunctionBegin;
2197   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2198   PetscCall(MatGetRowSumAbs(a->A, vA));
2199   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2200   PetscCall(MatGetRowSumAbs(a->B, vB));
2201   PetscCall(VecAXPY(vA, 1.0, vB));
2202   PetscCall(VecDestroy(&vB));
2203   PetscCall(VecCopy(vA, v));
2204   PetscCall(VecDestroy(&vA));
2205   PetscFunctionReturn(PETSC_SUCCESS);
2206 }
2207 
2208 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2209 {
2210   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2211   PetscInt           m = A->rmap->n, n = A->cmap->n;
2212   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2213   PetscInt          *cmap = mat->garray;
2214   PetscInt          *diagIdx, *offdiagIdx;
2215   Vec                diagV, offdiagV;
2216   PetscScalar       *a, *diagA, *offdiagA;
2217   const PetscScalar *ba, *bav;
2218   PetscInt           r, j, col, ncols, *bi, *bj;
2219   Mat                B = mat->B;
2220   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2221 
2222   PetscFunctionBegin;
2223   /* When a process holds entire A and other processes have no entry */
2224   if (A->cmap->N == n) {
2225     PetscCall(VecGetArrayWrite(v, &diagA));
2226     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2227     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2228     PetscCall(VecDestroy(&diagV));
2229     PetscCall(VecRestoreArrayWrite(v, &diagA));
2230     PetscFunctionReturn(PETSC_SUCCESS);
2231   } else if (n == 0) {
2232     if (m) {
2233       PetscCall(VecGetArrayWrite(v, &a));
2234       for (r = 0; r < m; r++) {
2235         a[r] = 0.0;
2236         if (idx) idx[r] = -1;
2237       }
2238       PetscCall(VecRestoreArrayWrite(v, &a));
2239     }
2240     PetscFunctionReturn(PETSC_SUCCESS);
2241   }
2242 
2243   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2244   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2245   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2246   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2247 
2248   /* Get offdiagIdx[] for implicit 0.0 */
2249   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2250   ba = bav;
2251   bi = b->i;
2252   bj = b->j;
2253   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2254   for (r = 0; r < m; r++) {
2255     ncols = bi[r + 1] - bi[r];
2256     if (ncols == A->cmap->N - n) { /* Brow is dense */
2257       offdiagA[r]   = *ba;
2258       offdiagIdx[r] = cmap[0];
2259     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2260       offdiagA[r] = 0.0;
2261 
2262       /* Find first hole in the cmap */
2263       for (j = 0; j < ncols; j++) {
2264         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2265         if (col > j && j < cstart) {
2266           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2267           break;
2268         } else if (col > j + n && j >= cstart) {
2269           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2270           break;
2271         }
2272       }
2273       if (j == ncols && ncols < A->cmap->N - n) {
2274         /* a hole is outside compressed Bcols */
2275         if (ncols == 0) {
2276           if (cstart) {
2277             offdiagIdx[r] = 0;
2278           } else offdiagIdx[r] = cend;
2279         } else { /* ncols > 0 */
2280           offdiagIdx[r] = cmap[ncols - 1] + 1;
2281           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2282         }
2283       }
2284     }
2285 
2286     for (j = 0; j < ncols; j++) {
2287       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2288         offdiagA[r]   = *ba;
2289         offdiagIdx[r] = cmap[*bj];
2290       }
2291       ba++;
2292       bj++;
2293     }
2294   }
2295 
2296   PetscCall(VecGetArrayWrite(v, &a));
2297   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2298   for (r = 0; r < m; ++r) {
2299     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2300       a[r] = diagA[r];
2301       if (idx) idx[r] = cstart + diagIdx[r];
2302     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2303       a[r] = diagA[r];
2304       if (idx) {
2305         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2306           idx[r] = cstart + diagIdx[r];
2307         } else idx[r] = offdiagIdx[r];
2308       }
2309     } else {
2310       a[r] = offdiagA[r];
2311       if (idx) idx[r] = offdiagIdx[r];
2312     }
2313   }
2314   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2315   PetscCall(VecRestoreArrayWrite(v, &a));
2316   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2317   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2318   PetscCall(VecDestroy(&diagV));
2319   PetscCall(VecDestroy(&offdiagV));
2320   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2321   PetscFunctionReturn(PETSC_SUCCESS);
2322 }
2323 
2324 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2325 {
2326   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2327   PetscInt           m = A->rmap->n, n = A->cmap->n;
2328   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2329   PetscInt          *cmap = mat->garray;
2330   PetscInt          *diagIdx, *offdiagIdx;
2331   Vec                diagV, offdiagV;
2332   PetscScalar       *a, *diagA, *offdiagA;
2333   const PetscScalar *ba, *bav;
2334   PetscInt           r, j, col, ncols, *bi, *bj;
2335   Mat                B = mat->B;
2336   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2337 
2338   PetscFunctionBegin;
2339   /* When a process holds entire A and other processes have no entry */
2340   if (A->cmap->N == n) {
2341     PetscCall(VecGetArrayWrite(v, &diagA));
2342     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2343     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2344     PetscCall(VecDestroy(&diagV));
2345     PetscCall(VecRestoreArrayWrite(v, &diagA));
2346     PetscFunctionReturn(PETSC_SUCCESS);
2347   } else if (n == 0) {
2348     if (m) {
2349       PetscCall(VecGetArrayWrite(v, &a));
2350       for (r = 0; r < m; r++) {
2351         a[r] = PETSC_MAX_REAL;
2352         if (idx) idx[r] = -1;
2353       }
2354       PetscCall(VecRestoreArrayWrite(v, &a));
2355     }
2356     PetscFunctionReturn(PETSC_SUCCESS);
2357   }
2358 
2359   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2360   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2361   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2362   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2363 
2364   /* Get offdiagIdx[] for implicit 0.0 */
2365   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2366   ba = bav;
2367   bi = b->i;
2368   bj = b->j;
2369   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2370   for (r = 0; r < m; r++) {
2371     ncols = bi[r + 1] - bi[r];
2372     if (ncols == A->cmap->N - n) { /* Brow is dense */
2373       offdiagA[r]   = *ba;
2374       offdiagIdx[r] = cmap[0];
2375     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2376       offdiagA[r] = 0.0;
2377 
2378       /* Find first hole in the cmap */
2379       for (j = 0; j < ncols; j++) {
2380         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2381         if (col > j && j < cstart) {
2382           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2383           break;
2384         } else if (col > j + n && j >= cstart) {
2385           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2386           break;
2387         }
2388       }
2389       if (j == ncols && ncols < A->cmap->N - n) {
2390         /* a hole is outside compressed Bcols */
2391         if (ncols == 0) {
2392           if (cstart) {
2393             offdiagIdx[r] = 0;
2394           } else offdiagIdx[r] = cend;
2395         } else { /* ncols > 0 */
2396           offdiagIdx[r] = cmap[ncols - 1] + 1;
2397           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2398         }
2399       }
2400     }
2401 
2402     for (j = 0; j < ncols; j++) {
2403       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2404         offdiagA[r]   = *ba;
2405         offdiagIdx[r] = cmap[*bj];
2406       }
2407       ba++;
2408       bj++;
2409     }
2410   }
2411 
2412   PetscCall(VecGetArrayWrite(v, &a));
2413   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2414   for (r = 0; r < m; ++r) {
2415     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2416       a[r] = diagA[r];
2417       if (idx) idx[r] = cstart + diagIdx[r];
2418     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2419       a[r] = diagA[r];
2420       if (idx) {
2421         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2422           idx[r] = cstart + diagIdx[r];
2423         } else idx[r] = offdiagIdx[r];
2424       }
2425     } else {
2426       a[r] = offdiagA[r];
2427       if (idx) idx[r] = offdiagIdx[r];
2428     }
2429   }
2430   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2431   PetscCall(VecRestoreArrayWrite(v, &a));
2432   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2433   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2434   PetscCall(VecDestroy(&diagV));
2435   PetscCall(VecDestroy(&offdiagV));
2436   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2437   PetscFunctionReturn(PETSC_SUCCESS);
2438 }
2439 
2440 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2441 {
2442   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2443   PetscInt           m = A->rmap->n, n = A->cmap->n;
2444   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2445   PetscInt          *cmap = mat->garray;
2446   PetscInt          *diagIdx, *offdiagIdx;
2447   Vec                diagV, offdiagV;
2448   PetscScalar       *a, *diagA, *offdiagA;
2449   const PetscScalar *ba, *bav;
2450   PetscInt           r, j, col, ncols, *bi, *bj;
2451   Mat                B = mat->B;
2452   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2453 
2454   PetscFunctionBegin;
2455   /* When a process holds entire A and other processes have no entry */
2456   if (A->cmap->N == n) {
2457     PetscCall(VecGetArrayWrite(v, &diagA));
2458     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2459     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2460     PetscCall(VecDestroy(&diagV));
2461     PetscCall(VecRestoreArrayWrite(v, &diagA));
2462     PetscFunctionReturn(PETSC_SUCCESS);
2463   } else if (n == 0) {
2464     if (m) {
2465       PetscCall(VecGetArrayWrite(v, &a));
2466       for (r = 0; r < m; r++) {
2467         a[r] = PETSC_MIN_REAL;
2468         if (idx) idx[r] = -1;
2469       }
2470       PetscCall(VecRestoreArrayWrite(v, &a));
2471     }
2472     PetscFunctionReturn(PETSC_SUCCESS);
2473   }
2474 
2475   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2476   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2477   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2478   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2479 
2480   /* Get offdiagIdx[] for implicit 0.0 */
2481   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2482   ba = bav;
2483   bi = b->i;
2484   bj = b->j;
2485   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2486   for (r = 0; r < m; r++) {
2487     ncols = bi[r + 1] - bi[r];
2488     if (ncols == A->cmap->N - n) { /* Brow is dense */
2489       offdiagA[r]   = *ba;
2490       offdiagIdx[r] = cmap[0];
2491     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2492       offdiagA[r] = 0.0;
2493 
2494       /* Find first hole in the cmap */
2495       for (j = 0; j < ncols; j++) {
2496         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2497         if (col > j && j < cstart) {
2498           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2499           break;
2500         } else if (col > j + n && j >= cstart) {
2501           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2502           break;
2503         }
2504       }
2505       if (j == ncols && ncols < A->cmap->N - n) {
2506         /* a hole is outside compressed Bcols */
2507         if (ncols == 0) {
2508           if (cstart) {
2509             offdiagIdx[r] = 0;
2510           } else offdiagIdx[r] = cend;
2511         } else { /* ncols > 0 */
2512           offdiagIdx[r] = cmap[ncols - 1] + 1;
2513           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2514         }
2515       }
2516     }
2517 
2518     for (j = 0; j < ncols; j++) {
2519       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2520         offdiagA[r]   = *ba;
2521         offdiagIdx[r] = cmap[*bj];
2522       }
2523       ba++;
2524       bj++;
2525     }
2526   }
2527 
2528   PetscCall(VecGetArrayWrite(v, &a));
2529   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2530   for (r = 0; r < m; ++r) {
2531     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2532       a[r] = diagA[r];
2533       if (idx) idx[r] = cstart + diagIdx[r];
2534     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2535       a[r] = diagA[r];
2536       if (idx) {
2537         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2538           idx[r] = cstart + diagIdx[r];
2539         } else idx[r] = offdiagIdx[r];
2540       }
2541     } else {
2542       a[r] = offdiagA[r];
2543       if (idx) idx[r] = offdiagIdx[r];
2544     }
2545   }
2546   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2547   PetscCall(VecRestoreArrayWrite(v, &a));
2548   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2549   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2550   PetscCall(VecDestroy(&diagV));
2551   PetscCall(VecDestroy(&offdiagV));
2552   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2553   PetscFunctionReturn(PETSC_SUCCESS);
2554 }
2555 
2556 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2557 {
2558   Mat *dummy;
2559 
2560   PetscFunctionBegin;
2561   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2562   *newmat = *dummy;
2563   PetscCall(PetscFree(dummy));
2564   PetscFunctionReturn(PETSC_SUCCESS);
2565 }
2566 
2567 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2568 {
2569   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2570 
2571   PetscFunctionBegin;
2572   PetscCall(MatInvertBlockDiagonal(a->A, values));
2573   A->factorerrortype = a->A->factorerrortype;
2574   PetscFunctionReturn(PETSC_SUCCESS);
2575 }
2576 
2577 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2578 {
2579   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2580 
2581   PetscFunctionBegin;
2582   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2583   PetscCall(MatSetRandom(aij->A, rctx));
2584   if (x->assembled) {
2585     PetscCall(MatSetRandom(aij->B, rctx));
2586   } else {
2587     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2588   }
2589   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2590   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2591   PetscFunctionReturn(PETSC_SUCCESS);
2592 }
2593 
2594 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2595 {
2596   PetscFunctionBegin;
2597   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2598   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2599   PetscFunctionReturn(PETSC_SUCCESS);
2600 }
2601 
2602 /*@
2603   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2604 
2605   Not Collective
2606 
2607   Input Parameter:
2608 . A - the matrix
2609 
2610   Output Parameter:
2611 . nz - the number of nonzeros
2612 
2613   Level: advanced
2614 
2615 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2616 @*/
2617 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2618 {
2619   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2620   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2621   PetscBool   isaij;
2622 
2623   PetscFunctionBegin;
2624   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2625   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2626   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2627   PetscFunctionReturn(PETSC_SUCCESS);
2628 }
2629 
2630 /*@
2631   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2632 
2633   Collective
2634 
2635   Input Parameters:
2636 + A  - the matrix
2637 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2638 
2639   Level: advanced
2640 
2641 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2642 @*/
2643 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2644 {
2645   PetscFunctionBegin;
2646   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2647   PetscFunctionReturn(PETSC_SUCCESS);
2648 }
2649 
2650 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2651 {
2652   PetscBool sc = PETSC_FALSE, flg;
2653 
2654   PetscFunctionBegin;
2655   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2656   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2657   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2658   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2659   PetscOptionsHeadEnd();
2660   PetscFunctionReturn(PETSC_SUCCESS);
2661 }
2662 
2663 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2664 {
2665   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2666   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2667 
2668   PetscFunctionBegin;
2669   if (!Y->preallocated) {
2670     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2671   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2672     PetscInt nonew = aij->nonew;
2673     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2674     aij->nonew = nonew;
2675   }
2676   PetscCall(MatShift_Basic(Y, a));
2677   PetscFunctionReturn(PETSC_SUCCESS);
2678 }
2679 
2680 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2681 {
2682   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2683 
2684   PetscFunctionBegin;
2685   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2686   PetscCall(MatMissingDiagonal(a->A, missing, d));
2687   if (d) {
2688     PetscInt rstart;
2689     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2690     *d += rstart;
2691   }
2692   PetscFunctionReturn(PETSC_SUCCESS);
2693 }
2694 
2695 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2696 {
2697   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2698 
2699   PetscFunctionBegin;
2700   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2701   PetscFunctionReturn(PETSC_SUCCESS);
2702 }
2703 
2704 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2705 {
2706   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2707 
2708   PetscFunctionBegin;
2709   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2710   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2715                                        MatGetRow_MPIAIJ,
2716                                        MatRestoreRow_MPIAIJ,
2717                                        MatMult_MPIAIJ,
2718                                        /* 4*/ MatMultAdd_MPIAIJ,
2719                                        MatMultTranspose_MPIAIJ,
2720                                        MatMultTransposeAdd_MPIAIJ,
2721                                        NULL,
2722                                        NULL,
2723                                        NULL,
2724                                        /*10*/ NULL,
2725                                        NULL,
2726                                        NULL,
2727                                        MatSOR_MPIAIJ,
2728                                        MatTranspose_MPIAIJ,
2729                                        /*15*/ MatGetInfo_MPIAIJ,
2730                                        MatEqual_MPIAIJ,
2731                                        MatGetDiagonal_MPIAIJ,
2732                                        MatDiagonalScale_MPIAIJ,
2733                                        MatNorm_MPIAIJ,
2734                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2735                                        MatAssemblyEnd_MPIAIJ,
2736                                        MatSetOption_MPIAIJ,
2737                                        MatZeroEntries_MPIAIJ,
2738                                        /*24*/ MatZeroRows_MPIAIJ,
2739                                        NULL,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        /*29*/ MatSetUp_MPI_Hash,
2744                                        NULL,
2745                                        NULL,
2746                                        MatGetDiagonalBlock_MPIAIJ,
2747                                        NULL,
2748                                        /*34*/ MatDuplicate_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        /*39*/ MatAXPY_MPIAIJ,
2754                                        MatCreateSubMatrices_MPIAIJ,
2755                                        MatIncreaseOverlap_MPIAIJ,
2756                                        MatGetValues_MPIAIJ,
2757                                        MatCopy_MPIAIJ,
2758                                        /*44*/ MatGetRowMax_MPIAIJ,
2759                                        MatScale_MPIAIJ,
2760                                        MatShift_MPIAIJ,
2761                                        MatDiagonalSet_MPIAIJ,
2762                                        MatZeroRowsColumns_MPIAIJ,
2763                                        /*49*/ MatSetRandom_MPIAIJ,
2764                                        MatGetRowIJ_MPIAIJ,
2765                                        MatRestoreRowIJ_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2769                                        NULL,
2770                                        MatSetUnfactored_MPIAIJ,
2771                                        MatPermute_MPIAIJ,
2772                                        NULL,
2773                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2774                                        MatDestroy_MPIAIJ,
2775                                        MatView_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        /*64*/ NULL,
2779                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2780                                        NULL,
2781                                        NULL,
2782                                        NULL,
2783                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2784                                        MatGetRowMinAbs_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        NULL,
2788                                        NULL,
2789                                        /*75*/ MatFDColoringApply_AIJ,
2790                                        MatSetFromOptions_MPIAIJ,
2791                                        NULL,
2792                                        NULL,
2793                                        MatFindZeroDiagonals_MPIAIJ,
2794                                        /*80*/ NULL,
2795                                        NULL,
2796                                        NULL,
2797                                        /*83*/ MatLoad_MPIAIJ,
2798                                        NULL,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        NULL,
2803                                        /*89*/ NULL,
2804                                        NULL,
2805                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2806                                        NULL,
2807                                        NULL,
2808                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        MatBindToCPU_MPIAIJ,
2813                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2814                                        NULL,
2815                                        NULL,
2816                                        MatConjugate_MPIAIJ,
2817                                        NULL,
2818                                        /*104*/ MatSetValuesRow_MPIAIJ,
2819                                        MatRealPart_MPIAIJ,
2820                                        MatImaginaryPart_MPIAIJ,
2821                                        NULL,
2822                                        NULL,
2823                                        /*109*/ NULL,
2824                                        NULL,
2825                                        MatGetRowMin_MPIAIJ,
2826                                        NULL,
2827                                        MatMissingDiagonal_MPIAIJ,
2828                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2829                                        NULL,
2830                                        MatGetGhosts_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2834                                        NULL,
2835                                        NULL,
2836                                        NULL,
2837                                        MatGetMultiProcBlock_MPIAIJ,
2838                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2839                                        MatGetColumnReductions_MPIAIJ,
2840                                        MatInvertBlockDiagonal_MPIAIJ,
2841                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2842                                        MatCreateSubMatricesMPI_MPIAIJ,
2843                                        /*129*/ NULL,
2844                                        NULL,
2845                                        NULL,
2846                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2847                                        NULL,
2848                                        /*134*/ NULL,
2849                                        NULL,
2850                                        NULL,
2851                                        NULL,
2852                                        NULL,
2853                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2854                                        NULL,
2855                                        NULL,
2856                                        MatFDColoringSetUp_MPIXAIJ,
2857                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2858                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2859                                        /*145*/ NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        MatCreateGraph_Simple_AIJ,
2863                                        NULL,
2864                                        /*150*/ NULL,
2865                                        MatEliminateZeros_MPIAIJ,
2866                                        MatGetRowSumAbs_MPIAIJ};
2867 
2868 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2869 {
2870   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2871 
2872   PetscFunctionBegin;
2873   PetscCall(MatStoreValues(aij->A));
2874   PetscCall(MatStoreValues(aij->B));
2875   PetscFunctionReturn(PETSC_SUCCESS);
2876 }
2877 
2878 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2879 {
2880   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2881 
2882   PetscFunctionBegin;
2883   PetscCall(MatRetrieveValues(aij->A));
2884   PetscCall(MatRetrieveValues(aij->B));
2885   PetscFunctionReturn(PETSC_SUCCESS);
2886 }
2887 
2888 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2889 {
2890   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2891   PetscMPIInt size;
2892 
2893   PetscFunctionBegin;
2894   if (B->hash_active) {
2895     B->ops[0]      = b->cops;
2896     B->hash_active = PETSC_FALSE;
2897   }
2898   PetscCall(PetscLayoutSetUp(B->rmap));
2899   PetscCall(PetscLayoutSetUp(B->cmap));
2900 
2901 #if defined(PETSC_USE_CTABLE)
2902   PetscCall(PetscHMapIDestroy(&b->colmap));
2903 #else
2904   PetscCall(PetscFree(b->colmap));
2905 #endif
2906   PetscCall(PetscFree(b->garray));
2907   PetscCall(VecDestroy(&b->lvec));
2908   PetscCall(VecScatterDestroy(&b->Mvctx));
2909 
2910   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2911 
2912   MatSeqXAIJGetOptions_Private(b->B);
2913   PetscCall(MatDestroy(&b->B));
2914   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2915   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2916   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2917   PetscCall(MatSetType(b->B, MATSEQAIJ));
2918   MatSeqXAIJRestoreOptions_Private(b->B);
2919 
2920   MatSeqXAIJGetOptions_Private(b->A);
2921   PetscCall(MatDestroy(&b->A));
2922   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2923   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2924   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2925   PetscCall(MatSetType(b->A, MATSEQAIJ));
2926   MatSeqXAIJRestoreOptions_Private(b->A);
2927 
2928   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2929   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2930   B->preallocated  = PETSC_TRUE;
2931   B->was_assembled = PETSC_FALSE;
2932   B->assembled     = PETSC_FALSE;
2933   PetscFunctionReturn(PETSC_SUCCESS);
2934 }
2935 
2936 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2937 {
2938   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2939 
2940   PetscFunctionBegin;
2941   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2942   PetscCall(PetscLayoutSetUp(B->rmap));
2943   PetscCall(PetscLayoutSetUp(B->cmap));
2944 
2945 #if defined(PETSC_USE_CTABLE)
2946   PetscCall(PetscHMapIDestroy(&b->colmap));
2947 #else
2948   PetscCall(PetscFree(b->colmap));
2949 #endif
2950   PetscCall(PetscFree(b->garray));
2951   PetscCall(VecDestroy(&b->lvec));
2952   PetscCall(VecScatterDestroy(&b->Mvctx));
2953 
2954   PetscCall(MatResetPreallocation(b->A));
2955   PetscCall(MatResetPreallocation(b->B));
2956   B->preallocated  = PETSC_TRUE;
2957   B->was_assembled = PETSC_FALSE;
2958   B->assembled     = PETSC_FALSE;
2959   PetscFunctionReturn(PETSC_SUCCESS);
2960 }
2961 
2962 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2963 {
2964   Mat         mat;
2965   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2966 
2967   PetscFunctionBegin;
2968   *newmat = NULL;
2969   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2970   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2971   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2972   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2973   a = (Mat_MPIAIJ *)mat->data;
2974 
2975   mat->factortype = matin->factortype;
2976   mat->assembled  = matin->assembled;
2977   mat->insertmode = NOT_SET_VALUES;
2978 
2979   a->size         = oldmat->size;
2980   a->rank         = oldmat->rank;
2981   a->donotstash   = oldmat->donotstash;
2982   a->roworiented  = oldmat->roworiented;
2983   a->rowindices   = NULL;
2984   a->rowvalues    = NULL;
2985   a->getrowactive = PETSC_FALSE;
2986 
2987   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2988   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2989   if (matin->hash_active) {
2990     PetscCall(MatSetUp(mat));
2991   } else {
2992     mat->preallocated = matin->preallocated;
2993     if (oldmat->colmap) {
2994 #if defined(PETSC_USE_CTABLE)
2995       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2996 #else
2997       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2998       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2999 #endif
3000     } else a->colmap = NULL;
3001     if (oldmat->garray) {
3002       PetscInt len;
3003       len = oldmat->B->cmap->n;
3004       PetscCall(PetscMalloc1(len + 1, &a->garray));
3005       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3006     } else a->garray = NULL;
3007 
3008     /* It may happen MatDuplicate is called with a non-assembled matrix
3009       In fact, MatDuplicate only requires the matrix to be preallocated
3010       This may happen inside a DMCreateMatrix_Shell */
3011     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3012     if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3013     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3014     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3015   }
3016   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3017   *newmat = mat;
3018   PetscFunctionReturn(PETSC_SUCCESS);
3019 }
3020 
3021 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3022 {
3023   PetscBool isbinary, ishdf5;
3024 
3025   PetscFunctionBegin;
3026   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3027   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3028   /* force binary viewer to load .info file if it has not yet done so */
3029   PetscCall(PetscViewerSetUp(viewer));
3030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3031   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3032   if (isbinary) {
3033     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3034   } else if (ishdf5) {
3035 #if defined(PETSC_HAVE_HDF5)
3036     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3037 #else
3038     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3039 #endif
3040   } else {
3041     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3042   }
3043   PetscFunctionReturn(PETSC_SUCCESS);
3044 }
3045 
3046 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3047 {
3048   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3049   PetscInt    *rowidxs, *colidxs;
3050   PetscScalar *matvals;
3051 
3052   PetscFunctionBegin;
3053   PetscCall(PetscViewerSetUp(viewer));
3054 
3055   /* read in matrix header */
3056   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3057   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3058   M  = header[1];
3059   N  = header[2];
3060   nz = header[3];
3061   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3062   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3063   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3064 
3065   /* set block sizes from the viewer's .info file */
3066   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3067   /* set global sizes if not set already */
3068   if (mat->rmap->N < 0) mat->rmap->N = M;
3069   if (mat->cmap->N < 0) mat->cmap->N = N;
3070   PetscCall(PetscLayoutSetUp(mat->rmap));
3071   PetscCall(PetscLayoutSetUp(mat->cmap));
3072 
3073   /* check if the matrix sizes are correct */
3074   PetscCall(MatGetSize(mat, &rows, &cols));
3075   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3076 
3077   /* read in row lengths and build row indices */
3078   PetscCall(MatGetLocalSize(mat, &m, NULL));
3079   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3080   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3081   rowidxs[0] = 0;
3082   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3083   if (nz != PETSC_MAX_INT) {
3084     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3085     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3086   }
3087 
3088   /* read in column indices and matrix values */
3089   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3090   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3091   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3092   /* store matrix indices and values */
3093   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3094   PetscCall(PetscFree(rowidxs));
3095   PetscCall(PetscFree2(colidxs, matvals));
3096   PetscFunctionReturn(PETSC_SUCCESS);
3097 }
3098 
3099 /* Not scalable because of ISAllGather() unless getting all columns. */
3100 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3101 {
3102   IS          iscol_local;
3103   PetscBool   isstride;
3104   PetscMPIInt lisstride = 0, gisstride;
3105 
3106   PetscFunctionBegin;
3107   /* check if we are grabbing all columns*/
3108   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3109 
3110   if (isstride) {
3111     PetscInt start, len, mstart, mlen;
3112     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3113     PetscCall(ISGetLocalSize(iscol, &len));
3114     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3115     if (mstart == start && mlen - mstart == len) lisstride = 1;
3116   }
3117 
3118   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3119   if (gisstride) {
3120     PetscInt N;
3121     PetscCall(MatGetSize(mat, NULL, &N));
3122     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3123     PetscCall(ISSetIdentity(iscol_local));
3124     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3125   } else {
3126     PetscInt cbs;
3127     PetscCall(ISGetBlockSize(iscol, &cbs));
3128     PetscCall(ISAllGather(iscol, &iscol_local));
3129     PetscCall(ISSetBlockSize(iscol_local, cbs));
3130   }
3131 
3132   *isseq = iscol_local;
3133   PetscFunctionReturn(PETSC_SUCCESS);
3134 }
3135 
3136 /*
3137  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3138  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3139 
3140  Input Parameters:
3141 +   mat - matrix
3142 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3143            i.e., mat->rstart <= isrow[i] < mat->rend
3144 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3145            i.e., mat->cstart <= iscol[i] < mat->cend
3146 
3147  Output Parameters:
3148 +   isrow_d - sequential row index set for retrieving mat->A
3149 .   iscol_d - sequential  column index set for retrieving mat->A
3150 .   iscol_o - sequential column index set for retrieving mat->B
3151 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3152  */
3153 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3154 {
3155   Vec             x, cmap;
3156   const PetscInt *is_idx;
3157   PetscScalar    *xarray, *cmaparray;
3158   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3159   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3160   Mat             B    = a->B;
3161   Vec             lvec = a->lvec, lcmap;
3162   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3163   MPI_Comm        comm;
3164   VecScatter      Mvctx = a->Mvctx;
3165 
3166   PetscFunctionBegin;
3167   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3168   PetscCall(ISGetLocalSize(iscol, &ncols));
3169 
3170   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3171   PetscCall(MatCreateVecs(mat, &x, NULL));
3172   PetscCall(VecSet(x, -1.0));
3173   PetscCall(VecDuplicate(x, &cmap));
3174   PetscCall(VecSet(cmap, -1.0));
3175 
3176   /* Get start indices */
3177   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3178   isstart -= ncols;
3179   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3180 
3181   PetscCall(ISGetIndices(iscol, &is_idx));
3182   PetscCall(VecGetArray(x, &xarray));
3183   PetscCall(VecGetArray(cmap, &cmaparray));
3184   PetscCall(PetscMalloc1(ncols, &idx));
3185   for (i = 0; i < ncols; i++) {
3186     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3187     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3188     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3189   }
3190   PetscCall(VecRestoreArray(x, &xarray));
3191   PetscCall(VecRestoreArray(cmap, &cmaparray));
3192   PetscCall(ISRestoreIndices(iscol, &is_idx));
3193 
3194   /* Get iscol_d */
3195   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3196   PetscCall(ISGetBlockSize(iscol, &i));
3197   PetscCall(ISSetBlockSize(*iscol_d, i));
3198 
3199   /* Get isrow_d */
3200   PetscCall(ISGetLocalSize(isrow, &m));
3201   rstart = mat->rmap->rstart;
3202   PetscCall(PetscMalloc1(m, &idx));
3203   PetscCall(ISGetIndices(isrow, &is_idx));
3204   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3205   PetscCall(ISRestoreIndices(isrow, &is_idx));
3206 
3207   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3208   PetscCall(ISGetBlockSize(isrow, &i));
3209   PetscCall(ISSetBlockSize(*isrow_d, i));
3210 
3211   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3212   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3213   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3214 
3215   PetscCall(VecDuplicate(lvec, &lcmap));
3216 
3217   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3218   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3219 
3220   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3221   /* off-process column indices */
3222   count = 0;
3223   PetscCall(PetscMalloc1(Bn, &idx));
3224   PetscCall(PetscMalloc1(Bn, &cmap1));
3225 
3226   PetscCall(VecGetArray(lvec, &xarray));
3227   PetscCall(VecGetArray(lcmap, &cmaparray));
3228   for (i = 0; i < Bn; i++) {
3229     if (PetscRealPart(xarray[i]) > -1.0) {
3230       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3231       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3232       count++;
3233     }
3234   }
3235   PetscCall(VecRestoreArray(lvec, &xarray));
3236   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3237 
3238   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3239   /* cannot ensure iscol_o has same blocksize as iscol! */
3240 
3241   PetscCall(PetscFree(idx));
3242   *garray = cmap1;
3243 
3244   PetscCall(VecDestroy(&x));
3245   PetscCall(VecDestroy(&cmap));
3246   PetscCall(VecDestroy(&lcmap));
3247   PetscFunctionReturn(PETSC_SUCCESS);
3248 }
3249 
3250 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3251 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3252 {
3253   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3254   Mat         M = NULL;
3255   MPI_Comm    comm;
3256   IS          iscol_d, isrow_d, iscol_o;
3257   Mat         Asub = NULL, Bsub = NULL;
3258   PetscInt    n;
3259 
3260   PetscFunctionBegin;
3261   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3262 
3263   if (call == MAT_REUSE_MATRIX) {
3264     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3265     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3266     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3267 
3268     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3269     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3270 
3271     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3272     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3273 
3274     /* Update diagonal and off-diagonal portions of submat */
3275     asub = (Mat_MPIAIJ *)(*submat)->data;
3276     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3277     PetscCall(ISGetLocalSize(iscol_o, &n));
3278     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3279     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3280     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3281 
3282   } else { /* call == MAT_INITIAL_MATRIX) */
3283     const PetscInt *garray;
3284     PetscInt        BsubN;
3285 
3286     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3287     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3288 
3289     /* Create local submatrices Asub and Bsub */
3290     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3291     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3292 
3293     /* Create submatrix M */
3294     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3295 
3296     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3297     asub = (Mat_MPIAIJ *)M->data;
3298 
3299     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3300     n = asub->B->cmap->N;
3301     if (BsubN > n) {
3302       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3303       const PetscInt *idx;
3304       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3305       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3306 
3307       PetscCall(PetscMalloc1(n, &idx_new));
3308       j = 0;
3309       PetscCall(ISGetIndices(iscol_o, &idx));
3310       for (i = 0; i < n; i++) {
3311         if (j >= BsubN) break;
3312         while (subgarray[i] > garray[j]) j++;
3313 
3314         if (subgarray[i] == garray[j]) {
3315           idx_new[i] = idx[j++];
3316         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3317       }
3318       PetscCall(ISRestoreIndices(iscol_o, &idx));
3319 
3320       PetscCall(ISDestroy(&iscol_o));
3321       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3322 
3323     } else if (BsubN < n) {
3324       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3325     }
3326 
3327     PetscCall(PetscFree(garray));
3328     *submat = M;
3329 
3330     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3331     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3332     PetscCall(ISDestroy(&isrow_d));
3333 
3334     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3335     PetscCall(ISDestroy(&iscol_d));
3336 
3337     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3338     PetscCall(ISDestroy(&iscol_o));
3339   }
3340   PetscFunctionReturn(PETSC_SUCCESS);
3341 }
3342 
3343 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3344 {
3345   IS        iscol_local = NULL, isrow_d;
3346   PetscInt  csize;
3347   PetscInt  n, i, j, start, end;
3348   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3349   MPI_Comm  comm;
3350 
3351   PetscFunctionBegin;
3352   /* If isrow has same processor distribution as mat,
3353      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3354   if (call == MAT_REUSE_MATRIX) {
3355     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3356     if (isrow_d) {
3357       sameRowDist  = PETSC_TRUE;
3358       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3359     } else {
3360       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3361       if (iscol_local) {
3362         sameRowDist  = PETSC_TRUE;
3363         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3364       }
3365     }
3366   } else {
3367     /* Check if isrow has same processor distribution as mat */
3368     sameDist[0] = PETSC_FALSE;
3369     PetscCall(ISGetLocalSize(isrow, &n));
3370     if (!n) {
3371       sameDist[0] = PETSC_TRUE;
3372     } else {
3373       PetscCall(ISGetMinMax(isrow, &i, &j));
3374       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3375       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3376     }
3377 
3378     /* Check if iscol has same processor distribution as mat */
3379     sameDist[1] = PETSC_FALSE;
3380     PetscCall(ISGetLocalSize(iscol, &n));
3381     if (!n) {
3382       sameDist[1] = PETSC_TRUE;
3383     } else {
3384       PetscCall(ISGetMinMax(iscol, &i, &j));
3385       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3386       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3387     }
3388 
3389     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3390     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3391     sameRowDist = tsameDist[0];
3392   }
3393 
3394   if (sameRowDist) {
3395     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3396       /* isrow and iscol have same processor distribution as mat */
3397       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3398       PetscFunctionReturn(PETSC_SUCCESS);
3399     } else { /* sameRowDist */
3400       /* isrow has same processor distribution as mat */
3401       if (call == MAT_INITIAL_MATRIX) {
3402         PetscBool sorted;
3403         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3404         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3405         PetscCall(ISGetSize(iscol, &i));
3406         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3407 
3408         PetscCall(ISSorted(iscol_local, &sorted));
3409         if (sorted) {
3410           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3411           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3412           PetscFunctionReturn(PETSC_SUCCESS);
3413         }
3414       } else { /* call == MAT_REUSE_MATRIX */
3415         IS iscol_sub;
3416         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3417         if (iscol_sub) {
3418           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3419           PetscFunctionReturn(PETSC_SUCCESS);
3420         }
3421       }
3422     }
3423   }
3424 
3425   /* General case: iscol -> iscol_local which has global size of iscol */
3426   if (call == MAT_REUSE_MATRIX) {
3427     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3428     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3429   } else {
3430     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3431   }
3432 
3433   PetscCall(ISGetLocalSize(iscol, &csize));
3434   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3435 
3436   if (call == MAT_INITIAL_MATRIX) {
3437     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3438     PetscCall(ISDestroy(&iscol_local));
3439   }
3440   PetscFunctionReturn(PETSC_SUCCESS);
3441 }
3442 
3443 /*@C
3444   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3445   and "off-diagonal" part of the matrix in CSR format.
3446 
3447   Collective
3448 
3449   Input Parameters:
3450 + comm   - MPI communicator
3451 . A      - "diagonal" portion of matrix
3452 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3453 - garray - global index of `B` columns
3454 
3455   Output Parameter:
3456 . mat - the matrix, with input `A` as its local diagonal matrix
3457 
3458   Level: advanced
3459 
3460   Notes:
3461   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3462 
3463   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3464 
3465 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3466 @*/
3467 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3468 {
3469   Mat_MPIAIJ        *maij;
3470   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3471   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3472   const PetscScalar *oa;
3473   Mat                Bnew;
3474   PetscInt           m, n, N;
3475   MatType            mpi_mat_type;
3476 
3477   PetscFunctionBegin;
3478   PetscCall(MatCreate(comm, mat));
3479   PetscCall(MatGetSize(A, &m, &n));
3480   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3481   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3482   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3483   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3484 
3485   /* Get global columns of mat */
3486   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3487 
3488   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3489   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3490   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3491   PetscCall(MatSetType(*mat, mpi_mat_type));
3492 
3493   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3494   maij = (Mat_MPIAIJ *)(*mat)->data;
3495 
3496   (*mat)->preallocated = PETSC_TRUE;
3497 
3498   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3499   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3500 
3501   /* Set A as diagonal portion of *mat */
3502   maij->A = A;
3503 
3504   nz = oi[m];
3505   for (i = 0; i < nz; i++) {
3506     col   = oj[i];
3507     oj[i] = garray[col];
3508   }
3509 
3510   /* Set Bnew as off-diagonal portion of *mat */
3511   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3512   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3513   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3514   bnew        = (Mat_SeqAIJ *)Bnew->data;
3515   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3516   maij->B     = Bnew;
3517 
3518   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3519 
3520   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3521   b->free_a       = PETSC_FALSE;
3522   b->free_ij      = PETSC_FALSE;
3523   PetscCall(MatDestroy(&B));
3524 
3525   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3526   bnew->free_a       = PETSC_TRUE;
3527   bnew->free_ij      = PETSC_TRUE;
3528 
3529   /* condense columns of maij->B */
3530   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3531   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3532   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3533   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3534   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3535   PetscFunctionReturn(PETSC_SUCCESS);
3536 }
3537 
3538 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3539 
3540 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3541 {
3542   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3543   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3544   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3545   Mat             M, Msub, B = a->B;
3546   MatScalar      *aa;
3547   Mat_SeqAIJ     *aij;
3548   PetscInt       *garray = a->garray, *colsub, Ncols;
3549   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3550   IS              iscol_sub, iscmap;
3551   const PetscInt *is_idx, *cmap;
3552   PetscBool       allcolumns = PETSC_FALSE;
3553   MPI_Comm        comm;
3554 
3555   PetscFunctionBegin;
3556   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3557   if (call == MAT_REUSE_MATRIX) {
3558     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3559     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3560     PetscCall(ISGetLocalSize(iscol_sub, &count));
3561 
3562     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3563     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3564 
3565     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3566     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3567 
3568     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3569 
3570   } else { /* call == MAT_INITIAL_MATRIX) */
3571     PetscBool flg;
3572 
3573     PetscCall(ISGetLocalSize(iscol, &n));
3574     PetscCall(ISGetSize(iscol, &Ncols));
3575 
3576     /* (1) iscol -> nonscalable iscol_local */
3577     /* Check for special case: each processor gets entire matrix columns */
3578     PetscCall(ISIdentity(iscol_local, &flg));
3579     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3580     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3581     if (allcolumns) {
3582       iscol_sub = iscol_local;
3583       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3584       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3585 
3586     } else {
3587       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3588       PetscInt *idx, *cmap1, k;
3589       PetscCall(PetscMalloc1(Ncols, &idx));
3590       PetscCall(PetscMalloc1(Ncols, &cmap1));
3591       PetscCall(ISGetIndices(iscol_local, &is_idx));
3592       count = 0;
3593       k     = 0;
3594       for (i = 0; i < Ncols; i++) {
3595         j = is_idx[i];
3596         if (j >= cstart && j < cend) {
3597           /* diagonal part of mat */
3598           idx[count]     = j;
3599           cmap1[count++] = i; /* column index in submat */
3600         } else if (Bn) {
3601           /* off-diagonal part of mat */
3602           if (j == garray[k]) {
3603             idx[count]     = j;
3604             cmap1[count++] = i; /* column index in submat */
3605           } else if (j > garray[k]) {
3606             while (j > garray[k] && k < Bn - 1) k++;
3607             if (j == garray[k]) {
3608               idx[count]     = j;
3609               cmap1[count++] = i; /* column index in submat */
3610             }
3611           }
3612         }
3613       }
3614       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3615 
3616       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3617       PetscCall(ISGetBlockSize(iscol, &cbs));
3618       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3619 
3620       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3621     }
3622 
3623     /* (3) Create sequential Msub */
3624     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3625   }
3626 
3627   PetscCall(ISGetLocalSize(iscol_sub, &count));
3628   aij = (Mat_SeqAIJ *)(Msub)->data;
3629   ii  = aij->i;
3630   PetscCall(ISGetIndices(iscmap, &cmap));
3631 
3632   /*
3633       m - number of local rows
3634       Ncols - number of columns (same on all processors)
3635       rstart - first row in new global matrix generated
3636   */
3637   PetscCall(MatGetSize(Msub, &m, NULL));
3638 
3639   if (call == MAT_INITIAL_MATRIX) {
3640     /* (4) Create parallel newmat */
3641     PetscMPIInt rank, size;
3642     PetscInt    csize;
3643 
3644     PetscCallMPI(MPI_Comm_size(comm, &size));
3645     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3646 
3647     /*
3648         Determine the number of non-zeros in the diagonal and off-diagonal
3649         portions of the matrix in order to do correct preallocation
3650     */
3651 
3652     /* first get start and end of "diagonal" columns */
3653     PetscCall(ISGetLocalSize(iscol, &csize));
3654     if (csize == PETSC_DECIDE) {
3655       PetscCall(ISGetSize(isrow, &mglobal));
3656       if (mglobal == Ncols) { /* square matrix */
3657         nlocal = m;
3658       } else {
3659         nlocal = Ncols / size + ((Ncols % size) > rank);
3660       }
3661     } else {
3662       nlocal = csize;
3663     }
3664     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3665     rstart = rend - nlocal;
3666     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3667 
3668     /* next, compute all the lengths */
3669     jj = aij->j;
3670     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3671     olens = dlens + m;
3672     for (i = 0; i < m; i++) {
3673       jend = ii[i + 1] - ii[i];
3674       olen = 0;
3675       dlen = 0;
3676       for (j = 0; j < jend; j++) {
3677         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3678         else dlen++;
3679         jj++;
3680       }
3681       olens[i] = olen;
3682       dlens[i] = dlen;
3683     }
3684 
3685     PetscCall(ISGetBlockSize(isrow, &bs));
3686     PetscCall(ISGetBlockSize(iscol, &cbs));
3687 
3688     PetscCall(MatCreate(comm, &M));
3689     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3690     PetscCall(MatSetBlockSizes(M, bs, cbs));
3691     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3692     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3693     PetscCall(PetscFree(dlens));
3694 
3695   } else { /* call == MAT_REUSE_MATRIX */
3696     M = *newmat;
3697     PetscCall(MatGetLocalSize(M, &i, NULL));
3698     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3699     PetscCall(MatZeroEntries(M));
3700     /*
3701          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3702        rather than the slower MatSetValues().
3703     */
3704     M->was_assembled = PETSC_TRUE;
3705     M->assembled     = PETSC_FALSE;
3706   }
3707 
3708   /* (5) Set values of Msub to *newmat */
3709   PetscCall(PetscMalloc1(count, &colsub));
3710   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3711 
3712   jj = aij->j;
3713   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3714   for (i = 0; i < m; i++) {
3715     row = rstart + i;
3716     nz  = ii[i + 1] - ii[i];
3717     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3718     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3719     jj += nz;
3720     aa += nz;
3721   }
3722   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3723   PetscCall(ISRestoreIndices(iscmap, &cmap));
3724 
3725   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3726   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3727 
3728   PetscCall(PetscFree(colsub));
3729 
3730   /* save Msub, iscol_sub and iscmap used in processor for next request */
3731   if (call == MAT_INITIAL_MATRIX) {
3732     *newmat = M;
3733     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3734     PetscCall(MatDestroy(&Msub));
3735 
3736     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3737     PetscCall(ISDestroy(&iscol_sub));
3738 
3739     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3740     PetscCall(ISDestroy(&iscmap));
3741 
3742     if (iscol_local) {
3743       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3744       PetscCall(ISDestroy(&iscol_local));
3745     }
3746   }
3747   PetscFunctionReturn(PETSC_SUCCESS);
3748 }
3749 
3750 /*
3751     Not great since it makes two copies of the submatrix, first an SeqAIJ
3752   in local and then by concatenating the local matrices the end result.
3753   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3754 
3755   This requires a sequential iscol with all indices.
3756 */
3757 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3758 {
3759   PetscMPIInt rank, size;
3760   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3761   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3762   Mat         M, Mreuse;
3763   MatScalar  *aa, *vwork;
3764   MPI_Comm    comm;
3765   Mat_SeqAIJ *aij;
3766   PetscBool   colflag, allcolumns = PETSC_FALSE;
3767 
3768   PetscFunctionBegin;
3769   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3770   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3771   PetscCallMPI(MPI_Comm_size(comm, &size));
3772 
3773   /* Check for special case: each processor gets entire matrix columns */
3774   PetscCall(ISIdentity(iscol, &colflag));
3775   PetscCall(ISGetLocalSize(iscol, &n));
3776   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3777   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3778 
3779   if (call == MAT_REUSE_MATRIX) {
3780     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3781     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3782     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3783   } else {
3784     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3785   }
3786 
3787   /*
3788       m - number of local rows
3789       n - number of columns (same on all processors)
3790       rstart - first row in new global matrix generated
3791   */
3792   PetscCall(MatGetSize(Mreuse, &m, &n));
3793   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3794   if (call == MAT_INITIAL_MATRIX) {
3795     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3796     ii  = aij->i;
3797     jj  = aij->j;
3798 
3799     /*
3800         Determine the number of non-zeros in the diagonal and off-diagonal
3801         portions of the matrix in order to do correct preallocation
3802     */
3803 
3804     /* first get start and end of "diagonal" columns */
3805     if (csize == PETSC_DECIDE) {
3806       PetscCall(ISGetSize(isrow, &mglobal));
3807       if (mglobal == n) { /* square matrix */
3808         nlocal = m;
3809       } else {
3810         nlocal = n / size + ((n % size) > rank);
3811       }
3812     } else {
3813       nlocal = csize;
3814     }
3815     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3816     rstart = rend - nlocal;
3817     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3818 
3819     /* next, compute all the lengths */
3820     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3821     olens = dlens + m;
3822     for (i = 0; i < m; i++) {
3823       jend = ii[i + 1] - ii[i];
3824       olen = 0;
3825       dlen = 0;
3826       for (j = 0; j < jend; j++) {
3827         if (*jj < rstart || *jj >= rend) olen++;
3828         else dlen++;
3829         jj++;
3830       }
3831       olens[i] = olen;
3832       dlens[i] = dlen;
3833     }
3834     PetscCall(MatCreate(comm, &M));
3835     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3836     PetscCall(MatSetBlockSizes(M, bs, cbs));
3837     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3838     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3839     PetscCall(PetscFree(dlens));
3840   } else {
3841     PetscInt ml, nl;
3842 
3843     M = *newmat;
3844     PetscCall(MatGetLocalSize(M, &ml, &nl));
3845     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3846     PetscCall(MatZeroEntries(M));
3847     /*
3848          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3849        rather than the slower MatSetValues().
3850     */
3851     M->was_assembled = PETSC_TRUE;
3852     M->assembled     = PETSC_FALSE;
3853   }
3854   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3855   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3856   ii  = aij->i;
3857   jj  = aij->j;
3858 
3859   /* trigger copy to CPU if needed */
3860   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3861   for (i = 0; i < m; i++) {
3862     row   = rstart + i;
3863     nz    = ii[i + 1] - ii[i];
3864     cwork = jj;
3865     jj    = PetscSafePointerPlusOffset(jj, nz);
3866     vwork = aa;
3867     aa    = PetscSafePointerPlusOffset(aa, nz);
3868     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3869   }
3870   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3871 
3872   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3873   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3874   *newmat = M;
3875 
3876   /* save submatrix used in processor for next request */
3877   if (call == MAT_INITIAL_MATRIX) {
3878     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3879     PetscCall(MatDestroy(&Mreuse));
3880   }
3881   PetscFunctionReturn(PETSC_SUCCESS);
3882 }
3883 
3884 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3885 {
3886   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3887   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3888   const PetscInt *JJ;
3889   PetscBool       nooffprocentries;
3890   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3891 
3892   PetscFunctionBegin;
3893   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3894 
3895   PetscCall(PetscLayoutSetUp(B->rmap));
3896   PetscCall(PetscLayoutSetUp(B->cmap));
3897   m      = B->rmap->n;
3898   cstart = B->cmap->rstart;
3899   cend   = B->cmap->rend;
3900   rstart = B->rmap->rstart;
3901 
3902   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3903 
3904   if (PetscDefined(USE_DEBUG)) {
3905     for (i = 0; i < m; i++) {
3906       nnz = Ii[i + 1] - Ii[i];
3907       JJ  = PetscSafePointerPlusOffset(J, Ii[i]);
3908       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3909       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3910       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3911     }
3912   }
3913 
3914   for (i = 0; i < m; i++) {
3915     nnz     = Ii[i + 1] - Ii[i];
3916     JJ      = PetscSafePointerPlusOffset(J, Ii[i]);
3917     nnz_max = PetscMax(nnz_max, nnz);
3918     d       = 0;
3919     for (j = 0; j < nnz; j++) {
3920       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3921     }
3922     d_nnz[i] = d;
3923     o_nnz[i] = nnz - d;
3924   }
3925   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3926   PetscCall(PetscFree2(d_nnz, o_nnz));
3927 
3928   for (i = 0; i < m; i++) {
3929     ii = i + rstart;
3930     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES));
3931   }
3932   nooffprocentries    = B->nooffprocentries;
3933   B->nooffprocentries = PETSC_TRUE;
3934   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3935   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3936   B->nooffprocentries = nooffprocentries;
3937 
3938   /* count number of entries below block diagonal */
3939   PetscCall(PetscFree(Aij->ld));
3940   PetscCall(PetscCalloc1(m, &ld));
3941   Aij->ld = ld;
3942   for (i = 0; i < m; i++) {
3943     nnz = Ii[i + 1] - Ii[i];
3944     j   = 0;
3945     while (j < nnz && J[j] < cstart) j++;
3946     ld[i] = j;
3947     if (J) J += nnz;
3948   }
3949 
3950   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3951   PetscFunctionReturn(PETSC_SUCCESS);
3952 }
3953 
3954 /*@
3955   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3956   (the default parallel PETSc format).
3957 
3958   Collective
3959 
3960   Input Parameters:
3961 + B - the matrix
3962 . i - the indices into `j` for the start of each local row (indices start with zero)
3963 . j - the column indices for each local row (indices start with zero)
3964 - v - optional values in the matrix
3965 
3966   Level: developer
3967 
3968   Notes:
3969   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3970   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3971   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3972 
3973   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3974 
3975   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3976 
3977   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3978 
3979   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3980   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3981 
3982   The format which is used for the sparse matrix input, is equivalent to a
3983   row-major ordering.. i.e for the following matrix, the input data expected is
3984   as shown
3985 .vb
3986         1 0 0
3987         2 0 3     P0
3988        -------
3989         4 5 6     P1
3990 
3991      Process0 [P0] rows_owned=[0,1]
3992         i =  {0,1,3}  [size = nrow+1  = 2+1]
3993         j =  {0,0,2}  [size = 3]
3994         v =  {1,2,3}  [size = 3]
3995 
3996      Process1 [P1] rows_owned=[2]
3997         i =  {0,3}    [size = nrow+1  = 1+1]
3998         j =  {0,1,2}  [size = 3]
3999         v =  {4,5,6}  [size = 3]
4000 .ve
4001 
4002 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4003           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4004 @*/
4005 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4006 {
4007   PetscFunctionBegin;
4008   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4009   PetscFunctionReturn(PETSC_SUCCESS);
4010 }
4011 
4012 /*@C
4013   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4014   (the default parallel PETSc format).  For good matrix assembly performance
4015   the user should preallocate the matrix storage by setting the parameters
4016   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4017 
4018   Collective
4019 
4020   Input Parameters:
4021 + B     - the matrix
4022 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4023            (same value is used for all local rows)
4024 . d_nnz - array containing the number of nonzeros in the various rows of the
4025            DIAGONAL portion of the local submatrix (possibly different for each row)
4026            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4027            The size of this array is equal to the number of local rows, i.e 'm'.
4028            For matrices that will be factored, you must leave room for (and set)
4029            the diagonal entry even if it is zero.
4030 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4031            submatrix (same value is used for all local rows).
4032 - o_nnz - array containing the number of nonzeros in the various rows of the
4033            OFF-DIAGONAL portion of the local submatrix (possibly different for
4034            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4035            structure. The size of this array is equal to the number
4036            of local rows, i.e 'm'.
4037 
4038   Example Usage:
4039   Consider the following 8x8 matrix with 34 non-zero values, that is
4040   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4041   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4042   as follows
4043 
4044 .vb
4045             1  2  0  |  0  3  0  |  0  4
4046     Proc0   0  5  6  |  7  0  0  |  8  0
4047             9  0 10  | 11  0  0  | 12  0
4048     -------------------------------------
4049            13  0 14  | 15 16 17  |  0  0
4050     Proc1   0 18  0  | 19 20 21  |  0  0
4051             0  0  0  | 22 23  0  | 24  0
4052     -------------------------------------
4053     Proc2  25 26 27  |  0  0 28  | 29  0
4054            30  0  0  | 31 32 33  |  0 34
4055 .ve
4056 
4057   This can be represented as a collection of submatrices as
4058 .vb
4059       A B C
4060       D E F
4061       G H I
4062 .ve
4063 
4064   Where the submatrices A,B,C are owned by proc0, D,E,F are
4065   owned by proc1, G,H,I are owned by proc2.
4066 
4067   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4068   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4069   The 'M','N' parameters are 8,8, and have the same values on all procs.
4070 
4071   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4072   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4073   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4074   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4075   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4076   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4077 
4078   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4079   allocated for every row of the local diagonal submatrix, and `o_nz`
4080   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4081   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4082   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4083   In this case, the values of `d_nz`, `o_nz` are
4084 .vb
4085      proc0  dnz = 2, o_nz = 2
4086      proc1  dnz = 3, o_nz = 2
4087      proc2  dnz = 1, o_nz = 4
4088 .ve
4089   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4090   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4091   for proc3. i.e we are using 12+15+10=37 storage locations to store
4092   34 values.
4093 
4094   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4095   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4096   In the above case the values for `d_nnz`, `o_nnz` are
4097 .vb
4098      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4099      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4100      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4101 .ve
4102   Here the space allocated is sum of all the above values i.e 34, and
4103   hence pre-allocation is perfect.
4104 
4105   Level: intermediate
4106 
4107   Notes:
4108   If the *_nnz parameter is given then the *_nz parameter is ignored
4109 
4110   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4111   storage.  The stored row and column indices begin with zero.
4112   See [Sparse Matrices](sec_matsparse) for details.
4113 
4114   The parallel matrix is partitioned such that the first m0 rows belong to
4115   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4116   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4117 
4118   The DIAGONAL portion of the local submatrix of a processor can be defined
4119   as the submatrix which is obtained by extraction the part corresponding to
4120   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4121   first row that belongs to the processor, r2 is the last row belonging to
4122   the this processor, and c1-c2 is range of indices of the local part of a
4123   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4124   common case of a square matrix, the row and column ranges are the same and
4125   the DIAGONAL part is also square. The remaining portion of the local
4126   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4127 
4128   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4129 
4130   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4131   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4132   You can also run with the option `-info` and look for messages with the string
4133   malloc in them to see if additional memory allocation was needed.
4134 
4135 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4136           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4137 @*/
4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4139 {
4140   PetscFunctionBegin;
4141   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4142   PetscValidType(B, 1);
4143   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4144   PetscFunctionReturn(PETSC_SUCCESS);
4145 }
4146 
4147 /*@
4148   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4149   CSR format for the local rows.
4150 
4151   Collective
4152 
4153   Input Parameters:
4154 + comm - MPI communicator
4155 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4156 . n    - This value should be the same as the local size used in creating the
4157          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4158          calculated if `N` is given) For square matrices n is almost always `m`.
4159 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4160 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4161 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4162 . j    - global column indices
4163 - a    - optional matrix values
4164 
4165   Output Parameter:
4166 . mat - the matrix
4167 
4168   Level: intermediate
4169 
4170   Notes:
4171   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4172   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4173   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4174 
4175   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4176 
4177   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4178 
4179   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4180   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4181 
4182   The format which is used for the sparse matrix input, is equivalent to a
4183   row-major ordering, i.e., for the following matrix, the input data expected is
4184   as shown
4185 .vb
4186         1 0 0
4187         2 0 3     P0
4188        -------
4189         4 5 6     P1
4190 
4191      Process0 [P0] rows_owned=[0,1]
4192         i =  {0,1,3}  [size = nrow+1  = 2+1]
4193         j =  {0,0,2}  [size = 3]
4194         v =  {1,2,3}  [size = 3]
4195 
4196      Process1 [P1] rows_owned=[2]
4197         i =  {0,3}    [size = nrow+1  = 1+1]
4198         j =  {0,1,2}  [size = 3]
4199         v =  {4,5,6}  [size = 3]
4200 .ve
4201 
4202 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4203           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4204 @*/
4205 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4206 {
4207   PetscFunctionBegin;
4208   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4209   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4210   PetscCall(MatCreate(comm, mat));
4211   PetscCall(MatSetSizes(*mat, m, n, M, N));
4212   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4213   PetscCall(MatSetType(*mat, MATMPIAIJ));
4214   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4215   PetscFunctionReturn(PETSC_SUCCESS);
4216 }
4217 
4218 /*@
4219   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4220   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4221   from `MatCreateMPIAIJWithArrays()`
4222 
4223   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4224 
4225   Collective
4226 
4227   Input Parameters:
4228 + mat - the matrix
4229 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4230 . n   - This value should be the same as the local size used in creating the
4231        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4232        calculated if N is given) For square matrices n is almost always m.
4233 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4234 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4235 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4236 . J   - column indices
4237 - v   - matrix values
4238 
4239   Level: deprecated
4240 
4241 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4242           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4243 @*/
4244 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4245 {
4246   PetscInt        nnz, i;
4247   PetscBool       nooffprocentries;
4248   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4249   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4250   PetscScalar    *ad, *ao;
4251   PetscInt        ldi, Iii, md;
4252   const PetscInt *Adi = Ad->i;
4253   PetscInt       *ld  = Aij->ld;
4254 
4255   PetscFunctionBegin;
4256   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4257   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4258   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4259   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4260 
4261   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4262   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4263 
4264   for (i = 0; i < m; i++) {
4265     if (PetscDefined(USE_DEBUG)) {
4266       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4267         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4268         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4269       }
4270     }
4271     nnz = Ii[i + 1] - Ii[i];
4272     Iii = Ii[i];
4273     ldi = ld[i];
4274     md  = Adi[i + 1] - Adi[i];
4275     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4276     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4277     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4278     ad += md;
4279     ao += nnz - md;
4280   }
4281   nooffprocentries      = mat->nooffprocentries;
4282   mat->nooffprocentries = PETSC_TRUE;
4283   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4284   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4285   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4286   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4287   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4288   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4289   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4290   mat->nooffprocentries = nooffprocentries;
4291   PetscFunctionReturn(PETSC_SUCCESS);
4292 }
4293 
4294 /*@
4295   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4296 
4297   Collective
4298 
4299   Input Parameters:
4300 + mat - the matrix
4301 - v   - matrix values, stored by row
4302 
4303   Level: intermediate
4304 
4305   Notes:
4306   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4307 
4308   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4309 
4310 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4311           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4312 @*/
4313 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4314 {
4315   PetscInt        nnz, i, m;
4316   PetscBool       nooffprocentries;
4317   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4318   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4319   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4320   PetscScalar    *ad, *ao;
4321   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4322   PetscInt        ldi, Iii, md;
4323   PetscInt       *ld = Aij->ld;
4324 
4325   PetscFunctionBegin;
4326   m = mat->rmap->n;
4327 
4328   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4329   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4330   Iii = 0;
4331   for (i = 0; i < m; i++) {
4332     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4333     ldi = ld[i];
4334     md  = Adi[i + 1] - Adi[i];
4335     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4336     ad += md;
4337     if (ao) {
4338       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4339       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4340       ao += nnz - md;
4341     }
4342     Iii += nnz;
4343   }
4344   nooffprocentries      = mat->nooffprocentries;
4345   mat->nooffprocentries = PETSC_TRUE;
4346   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4347   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4348   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4349   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4350   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4351   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4352   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4353   mat->nooffprocentries = nooffprocentries;
4354   PetscFunctionReturn(PETSC_SUCCESS);
4355 }
4356 
4357 /*@C
4358   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4359   (the default parallel PETSc format).  For good matrix assembly performance
4360   the user should preallocate the matrix storage by setting the parameters
4361   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4362 
4363   Collective
4364 
4365   Input Parameters:
4366 + comm  - MPI communicator
4367 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4368           This value should be the same as the local size used in creating the
4369           y vector for the matrix-vector product y = Ax.
4370 . n     - This value should be the same as the local size used in creating the
4371           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4372           calculated if N is given) For square matrices n is almost always m.
4373 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4374 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4375 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4376           (same value is used for all local rows)
4377 . d_nnz - array containing the number of nonzeros in the various rows of the
4378           DIAGONAL portion of the local submatrix (possibly different for each row)
4379           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4380           The size of this array is equal to the number of local rows, i.e 'm'.
4381 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4382           submatrix (same value is used for all local rows).
4383 - o_nnz - array containing the number of nonzeros in the various rows of the
4384           OFF-DIAGONAL portion of the local submatrix (possibly different for
4385           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4386           structure. The size of this array is equal to the number
4387           of local rows, i.e 'm'.
4388 
4389   Output Parameter:
4390 . A - the matrix
4391 
4392   Options Database Keys:
4393 + -mat_no_inode                     - Do not use inodes
4394 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4395 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4396                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4397                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4398 
4399   Level: intermediate
4400 
4401   Notes:
4402   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4403   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4404   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4405 
4406   If the *_nnz parameter is given then the *_nz parameter is ignored
4407 
4408   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4409   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4410   storage requirements for this matrix.
4411 
4412   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4413   processor than it must be used on all processors that share the object for
4414   that argument.
4415 
4416   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4417   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4418 
4419   The user MUST specify either the local or global matrix dimensions
4420   (possibly both).
4421 
4422   The parallel matrix is partitioned across processors such that the
4423   first `m0` rows belong to process 0, the next `m1` rows belong to
4424   process 1, the next `m2` rows belong to process 2, etc., where
4425   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4426   values corresponding to [m x N] submatrix.
4427 
4428   The columns are logically partitioned with the n0 columns belonging
4429   to 0th partition, the next n1 columns belonging to the next
4430   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4431 
4432   The DIAGONAL portion of the local submatrix on any given processor
4433   is the submatrix corresponding to the rows and columns m,n
4434   corresponding to the given processor. i.e diagonal matrix on
4435   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4436   etc. The remaining portion of the local submatrix [m x (N-n)]
4437   constitute the OFF-DIAGONAL portion. The example below better
4438   illustrates this concept.
4439 
4440   For a square global matrix we define each processor's diagonal portion
4441   to be its local rows and the corresponding columns (a square submatrix);
4442   each processor's off-diagonal portion encompasses the remainder of the
4443   local matrix (a rectangular submatrix).
4444 
4445   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4446 
4447   When calling this routine with a single process communicator, a matrix of
4448   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4449   type of communicator, use the construction mechanism
4450 .vb
4451   MatCreate(..., &A);
4452   MatSetType(A, MATMPIAIJ);
4453   MatSetSizes(A, m, n, M, N);
4454   MatMPIAIJSetPreallocation(A, ...);
4455 .ve
4456 
4457   By default, this format uses inodes (identical nodes) when possible.
4458   We search for consecutive rows with the same nonzero structure, thereby
4459   reusing matrix information to achieve increased efficiency.
4460 
4461   Example Usage:
4462   Consider the following 8x8 matrix with 34 non-zero values, that is
4463   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4464   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4465   as follows
4466 
4467 .vb
4468             1  2  0  |  0  3  0  |  0  4
4469     Proc0   0  5  6  |  7  0  0  |  8  0
4470             9  0 10  | 11  0  0  | 12  0
4471     -------------------------------------
4472            13  0 14  | 15 16 17  |  0  0
4473     Proc1   0 18  0  | 19 20 21  |  0  0
4474             0  0  0  | 22 23  0  | 24  0
4475     -------------------------------------
4476     Proc2  25 26 27  |  0  0 28  | 29  0
4477            30  0  0  | 31 32 33  |  0 34
4478 .ve
4479 
4480   This can be represented as a collection of submatrices as
4481 
4482 .vb
4483       A B C
4484       D E F
4485       G H I
4486 .ve
4487 
4488   Where the submatrices A,B,C are owned by proc0, D,E,F are
4489   owned by proc1, G,H,I are owned by proc2.
4490 
4491   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4492   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4493   The 'M','N' parameters are 8,8, and have the same values on all procs.
4494 
4495   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4496   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4497   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4498   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4499   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4500   matrix, ans [DF] as another SeqAIJ matrix.
4501 
4502   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4503   allocated for every row of the local diagonal submatrix, and `o_nz`
4504   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4505   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4506   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4507   In this case, the values of `d_nz`,`o_nz` are
4508 .vb
4509      proc0  dnz = 2, o_nz = 2
4510      proc1  dnz = 3, o_nz = 2
4511      proc2  dnz = 1, o_nz = 4
4512 .ve
4513   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4514   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4515   for proc3. i.e we are using 12+15+10=37 storage locations to store
4516   34 values.
4517 
4518   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4519   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4520   In the above case the values for d_nnz,o_nnz are
4521 .vb
4522      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4523      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4524      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4525 .ve
4526   Here the space allocated is sum of all the above values i.e 34, and
4527   hence pre-allocation is perfect.
4528 
4529 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4530           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4531           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4532 @*/
4533 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4534 {
4535   PetscMPIInt size;
4536 
4537   PetscFunctionBegin;
4538   PetscCall(MatCreate(comm, A));
4539   PetscCall(MatSetSizes(*A, m, n, M, N));
4540   PetscCallMPI(MPI_Comm_size(comm, &size));
4541   if (size > 1) {
4542     PetscCall(MatSetType(*A, MATMPIAIJ));
4543     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4544   } else {
4545     PetscCall(MatSetType(*A, MATSEQAIJ));
4546     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4547   }
4548   PetscFunctionReturn(PETSC_SUCCESS);
4549 }
4550 
4551 /*MC
4552     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4553 
4554     Synopsis:
4555     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4556 
4557     Not Collective
4558 
4559     Input Parameter:
4560 .   A - the `MATMPIAIJ` matrix
4561 
4562     Output Parameters:
4563 +   Ad - the diagonal portion of the matrix
4564 .   Ao - the off-diagonal portion of the matrix
4565 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4566 -   ierr - error code
4567 
4568      Level: advanced
4569 
4570     Note:
4571     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4572 
4573 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4574 M*/
4575 
4576 /*MC
4577     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4578 
4579     Synopsis:
4580     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4581 
4582     Not Collective
4583 
4584     Input Parameters:
4585 +   A - the `MATMPIAIJ` matrix
4586 .   Ad - the diagonal portion of the matrix
4587 .   Ao - the off-diagonal portion of the matrix
4588 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4589 -   ierr - error code
4590 
4591      Level: advanced
4592 
4593 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4594 M*/
4595 
4596 /*@C
4597   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4598 
4599   Not Collective
4600 
4601   Input Parameter:
4602 . A - The `MATMPIAIJ` matrix
4603 
4604   Output Parameters:
4605 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4606 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4607 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4608 
4609   Level: intermediate
4610 
4611   Note:
4612   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4613   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4614   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4615   local column numbers to global column numbers in the original matrix.
4616 
4617   Fortran Notes:
4618   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4619 
4620 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4621 @*/
4622 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4623 {
4624   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4625   PetscBool   flg;
4626 
4627   PetscFunctionBegin;
4628   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4629   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4630   if (Ad) *Ad = a->A;
4631   if (Ao) *Ao = a->B;
4632   if (colmap) *colmap = a->garray;
4633   PetscFunctionReturn(PETSC_SUCCESS);
4634 }
4635 
4636 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4637 {
4638   PetscInt     m, N, i, rstart, nnz, Ii;
4639   PetscInt    *indx;
4640   PetscScalar *values;
4641   MatType      rootType;
4642 
4643   PetscFunctionBegin;
4644   PetscCall(MatGetSize(inmat, &m, &N));
4645   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4646     PetscInt *dnz, *onz, sum, bs, cbs;
4647 
4648     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4649     /* Check sum(n) = N */
4650     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4651     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4652 
4653     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4654     rstart -= m;
4655 
4656     MatPreallocateBegin(comm, m, n, dnz, onz);
4657     for (i = 0; i < m; i++) {
4658       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4659       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4660       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4661     }
4662 
4663     PetscCall(MatCreate(comm, outmat));
4664     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4665     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4666     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4667     PetscCall(MatGetRootType_Private(inmat, &rootType));
4668     PetscCall(MatSetType(*outmat, rootType));
4669     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4670     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4671     MatPreallocateEnd(dnz, onz);
4672     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4673   }
4674 
4675   /* numeric phase */
4676   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4677   for (i = 0; i < m; i++) {
4678     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4679     Ii = i + rstart;
4680     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4681     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4682   }
4683   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4684   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4685   PetscFunctionReturn(PETSC_SUCCESS);
4686 }
4687 
4688 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4689 {
4690   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4691 
4692   PetscFunctionBegin;
4693   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4694   PetscCall(PetscFree(merge->id_r));
4695   PetscCall(PetscFree(merge->len_s));
4696   PetscCall(PetscFree(merge->len_r));
4697   PetscCall(PetscFree(merge->bi));
4698   PetscCall(PetscFree(merge->bj));
4699   PetscCall(PetscFree(merge->buf_ri[0]));
4700   PetscCall(PetscFree(merge->buf_ri));
4701   PetscCall(PetscFree(merge->buf_rj[0]));
4702   PetscCall(PetscFree(merge->buf_rj));
4703   PetscCall(PetscFree(merge->coi));
4704   PetscCall(PetscFree(merge->coj));
4705   PetscCall(PetscFree(merge->owners_co));
4706   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4707   PetscCall(PetscFree(merge));
4708   PetscFunctionReturn(PETSC_SUCCESS);
4709 }
4710 
4711 #include <../src/mat/utils/freespace.h>
4712 #include <petscbt.h>
4713 
4714 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4715 {
4716   MPI_Comm             comm;
4717   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4718   PetscMPIInt          size, rank, taga, *len_s;
4719   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4720   PetscInt             proc, m;
4721   PetscInt           **buf_ri, **buf_rj;
4722   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4723   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4724   MPI_Request         *s_waits, *r_waits;
4725   MPI_Status          *status;
4726   const MatScalar     *aa, *a_a;
4727   MatScalar          **abuf_r, *ba_i;
4728   Mat_Merge_SeqsToMPI *merge;
4729   PetscContainer       container;
4730 
4731   PetscFunctionBegin;
4732   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4733   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4734 
4735   PetscCallMPI(MPI_Comm_size(comm, &size));
4736   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4737 
4738   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4739   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4740   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4741   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4742   aa = a_a;
4743 
4744   bi     = merge->bi;
4745   bj     = merge->bj;
4746   buf_ri = merge->buf_ri;
4747   buf_rj = merge->buf_rj;
4748 
4749   PetscCall(PetscMalloc1(size, &status));
4750   owners = merge->rowmap->range;
4751   len_s  = merge->len_s;
4752 
4753   /* send and recv matrix values */
4754   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4755   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4756 
4757   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4758   for (proc = 0, k = 0; proc < size; proc++) {
4759     if (!len_s[proc]) continue;
4760     i = owners[proc];
4761     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4762     k++;
4763   }
4764 
4765   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4766   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4767   PetscCall(PetscFree(status));
4768 
4769   PetscCall(PetscFree(s_waits));
4770   PetscCall(PetscFree(r_waits));
4771 
4772   /* insert mat values of mpimat */
4773   PetscCall(PetscMalloc1(N, &ba_i));
4774   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4775 
4776   for (k = 0; k < merge->nrecv; k++) {
4777     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4778     nrows       = *buf_ri_k[k];
4779     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4780     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4781   }
4782 
4783   /* set values of ba */
4784   m = merge->rowmap->n;
4785   for (i = 0; i < m; i++) {
4786     arow = owners[rank] + i;
4787     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4788     bnzi = bi[i + 1] - bi[i];
4789     PetscCall(PetscArrayzero(ba_i, bnzi));
4790 
4791     /* add local non-zero vals of this proc's seqmat into ba */
4792     anzi   = ai[arow + 1] - ai[arow];
4793     aj     = a->j + ai[arow];
4794     aa     = a_a + ai[arow];
4795     nextaj = 0;
4796     for (j = 0; nextaj < anzi; j++) {
4797       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4798         ba_i[j] += aa[nextaj++];
4799       }
4800     }
4801 
4802     /* add received vals into ba */
4803     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4804       /* i-th row */
4805       if (i == *nextrow[k]) {
4806         anzi   = *(nextai[k] + 1) - *nextai[k];
4807         aj     = buf_rj[k] + *nextai[k];
4808         aa     = abuf_r[k] + *nextai[k];
4809         nextaj = 0;
4810         for (j = 0; nextaj < anzi; j++) {
4811           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4812             ba_i[j] += aa[nextaj++];
4813           }
4814         }
4815         nextrow[k]++;
4816         nextai[k]++;
4817       }
4818     }
4819     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4820   }
4821   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4822   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4823   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4824 
4825   PetscCall(PetscFree(abuf_r[0]));
4826   PetscCall(PetscFree(abuf_r));
4827   PetscCall(PetscFree(ba_i));
4828   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4829   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4830   PetscFunctionReturn(PETSC_SUCCESS);
4831 }
4832 
4833 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4834 {
4835   Mat                  B_mpi;
4836   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4837   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4838   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4839   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4840   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4841   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4842   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4843   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4844   MPI_Status          *status;
4845   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4846   PetscBT              lnkbt;
4847   Mat_Merge_SeqsToMPI *merge;
4848   PetscContainer       container;
4849 
4850   PetscFunctionBegin;
4851   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4852 
4853   /* make sure it is a PETSc comm */
4854   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4855   PetscCallMPI(MPI_Comm_size(comm, &size));
4856   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4857 
4858   PetscCall(PetscNew(&merge));
4859   PetscCall(PetscMalloc1(size, &status));
4860 
4861   /* determine row ownership */
4862   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4863   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4864   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4865   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4866   PetscCall(PetscLayoutSetUp(merge->rowmap));
4867   PetscCall(PetscMalloc1(size, &len_si));
4868   PetscCall(PetscMalloc1(size, &merge->len_s));
4869 
4870   m      = merge->rowmap->n;
4871   owners = merge->rowmap->range;
4872 
4873   /* determine the number of messages to send, their lengths */
4874   len_s = merge->len_s;
4875 
4876   len          = 0; /* length of buf_si[] */
4877   merge->nsend = 0;
4878   for (proc = 0; proc < size; proc++) {
4879     len_si[proc] = 0;
4880     if (proc == rank) {
4881       len_s[proc] = 0;
4882     } else {
4883       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4884       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4885     }
4886     if (len_s[proc]) {
4887       merge->nsend++;
4888       nrows = 0;
4889       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4890         if (ai[i + 1] > ai[i]) nrows++;
4891       }
4892       len_si[proc] = 2 * (nrows + 1);
4893       len += len_si[proc];
4894     }
4895   }
4896 
4897   /* determine the number and length of messages to receive for ij-structure */
4898   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4899   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4900 
4901   /* post the Irecv of j-structure */
4902   PetscCall(PetscCommGetNewTag(comm, &tagj));
4903   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4904 
4905   /* post the Isend of j-structure */
4906   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4907 
4908   for (proc = 0, k = 0; proc < size; proc++) {
4909     if (!len_s[proc]) continue;
4910     i = owners[proc];
4911     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4912     k++;
4913   }
4914 
4915   /* receives and sends of j-structure are complete */
4916   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4917   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4918 
4919   /* send and recv i-structure */
4920   PetscCall(PetscCommGetNewTag(comm, &tagi));
4921   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4922 
4923   PetscCall(PetscMalloc1(len + 1, &buf_s));
4924   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4925   for (proc = 0, k = 0; proc < size; proc++) {
4926     if (!len_s[proc]) continue;
4927     /* form outgoing message for i-structure:
4928          buf_si[0]:                 nrows to be sent
4929                [1:nrows]:           row index (global)
4930                [nrows+1:2*nrows+1]: i-structure index
4931     */
4932     nrows       = len_si[proc] / 2 - 1;
4933     buf_si_i    = buf_si + nrows + 1;
4934     buf_si[0]   = nrows;
4935     buf_si_i[0] = 0;
4936     nrows       = 0;
4937     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4938       anzi = ai[i + 1] - ai[i];
4939       if (anzi) {
4940         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4941         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4942         nrows++;
4943       }
4944     }
4945     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4946     k++;
4947     buf_si += len_si[proc];
4948   }
4949 
4950   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4951   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4952 
4953   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4954   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4955 
4956   PetscCall(PetscFree(len_si));
4957   PetscCall(PetscFree(len_ri));
4958   PetscCall(PetscFree(rj_waits));
4959   PetscCall(PetscFree2(si_waits, sj_waits));
4960   PetscCall(PetscFree(ri_waits));
4961   PetscCall(PetscFree(buf_s));
4962   PetscCall(PetscFree(status));
4963 
4964   /* compute a local seq matrix in each processor */
4965   /* allocate bi array and free space for accumulating nonzero column info */
4966   PetscCall(PetscMalloc1(m + 1, &bi));
4967   bi[0] = 0;
4968 
4969   /* create and initialize a linked list */
4970   nlnk = N + 1;
4971   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4972 
4973   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4974   len = ai[owners[rank + 1]] - ai[owners[rank]];
4975   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4976 
4977   current_space = free_space;
4978 
4979   /* determine symbolic info for each local row */
4980   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4981 
4982   for (k = 0; k < merge->nrecv; k++) {
4983     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4984     nrows       = *buf_ri_k[k];
4985     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4986     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4987   }
4988 
4989   MatPreallocateBegin(comm, m, n, dnz, onz);
4990   len = 0;
4991   for (i = 0; i < m; i++) {
4992     bnzi = 0;
4993     /* add local non-zero cols of this proc's seqmat into lnk */
4994     arow = owners[rank] + i;
4995     anzi = ai[arow + 1] - ai[arow];
4996     aj   = a->j + ai[arow];
4997     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4998     bnzi += nlnk;
4999     /* add received col data into lnk */
5000     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5001       if (i == *nextrow[k]) {            /* i-th row */
5002         anzi = *(nextai[k] + 1) - *nextai[k];
5003         aj   = buf_rj[k] + *nextai[k];
5004         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5005         bnzi += nlnk;
5006         nextrow[k]++;
5007         nextai[k]++;
5008       }
5009     }
5010     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5011 
5012     /* if free space is not available, make more free space */
5013     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5014     /* copy data into free space, then initialize lnk */
5015     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5016     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5017 
5018     current_space->array += bnzi;
5019     current_space->local_used += bnzi;
5020     current_space->local_remaining -= bnzi;
5021 
5022     bi[i + 1] = bi[i] + bnzi;
5023   }
5024 
5025   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5026 
5027   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5028   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5029   PetscCall(PetscLLDestroy(lnk, lnkbt));
5030 
5031   /* create symbolic parallel matrix B_mpi */
5032   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5033   PetscCall(MatCreate(comm, &B_mpi));
5034   if (n == PETSC_DECIDE) {
5035     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5036   } else {
5037     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5038   }
5039   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5040   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5041   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5042   MatPreallocateEnd(dnz, onz);
5043   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5044 
5045   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5046   B_mpi->assembled = PETSC_FALSE;
5047   merge->bi        = bi;
5048   merge->bj        = bj;
5049   merge->buf_ri    = buf_ri;
5050   merge->buf_rj    = buf_rj;
5051   merge->coi       = NULL;
5052   merge->coj       = NULL;
5053   merge->owners_co = NULL;
5054 
5055   PetscCall(PetscCommDestroy(&comm));
5056 
5057   /* attach the supporting struct to B_mpi for reuse */
5058   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5059   PetscCall(PetscContainerSetPointer(container, merge));
5060   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5061   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5062   PetscCall(PetscContainerDestroy(&container));
5063   *mpimat = B_mpi;
5064 
5065   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5066   PetscFunctionReturn(PETSC_SUCCESS);
5067 }
5068 
5069 /*@C
5070   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5071   matrices from each processor
5072 
5073   Collective
5074 
5075   Input Parameters:
5076 + comm   - the communicators the parallel matrix will live on
5077 . seqmat - the input sequential matrices
5078 . m      - number of local rows (or `PETSC_DECIDE`)
5079 . n      - number of local columns (or `PETSC_DECIDE`)
5080 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5081 
5082   Output Parameter:
5083 . mpimat - the parallel matrix generated
5084 
5085   Level: advanced
5086 
5087   Note:
5088   The dimensions of the sequential matrix in each processor MUST be the same.
5089   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5090   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5091 
5092 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5093 @*/
5094 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5095 {
5096   PetscMPIInt size;
5097 
5098   PetscFunctionBegin;
5099   PetscCallMPI(MPI_Comm_size(comm, &size));
5100   if (size == 1) {
5101     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5102     if (scall == MAT_INITIAL_MATRIX) {
5103       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5104     } else {
5105       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5106     }
5107     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5108     PetscFunctionReturn(PETSC_SUCCESS);
5109   }
5110   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5111   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5112   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5113   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5114   PetscFunctionReturn(PETSC_SUCCESS);
5115 }
5116 
5117 /*@
5118   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5119 
5120   Not Collective
5121 
5122   Input Parameter:
5123 . A - the matrix
5124 
5125   Output Parameter:
5126 . A_loc - the local sequential matrix generated
5127 
5128   Level: developer
5129 
5130   Notes:
5131   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5132   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5133   `n` is the global column count obtained with `MatGetSize()`
5134 
5135   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5136 
5137   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5138 
5139   Destroy the matrix with `MatDestroy()`
5140 
5141 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5142 @*/
5143 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5144 {
5145   PetscBool mpi;
5146 
5147   PetscFunctionBegin;
5148   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5149   if (mpi) {
5150     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5151   } else {
5152     *A_loc = A;
5153     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5154   }
5155   PetscFunctionReturn(PETSC_SUCCESS);
5156 }
5157 
5158 /*@
5159   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5160 
5161   Not Collective
5162 
5163   Input Parameters:
5164 + A     - the matrix
5165 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5166 
5167   Output Parameter:
5168 . A_loc - the local sequential matrix generated
5169 
5170   Level: developer
5171 
5172   Notes:
5173   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5174   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5175   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5176 
5177   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5178 
5179   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5180   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5181   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5182   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5183 
5184 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5185 @*/
5186 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5187 {
5188   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5189   Mat_SeqAIJ        *mat, *a, *b;
5190   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5191   const PetscScalar *aa, *ba, *aav, *bav;
5192   PetscScalar       *ca, *cam;
5193   PetscMPIInt        size;
5194   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5195   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5196   PetscBool          match;
5197 
5198   PetscFunctionBegin;
5199   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5200   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5201   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5202   if (size == 1) {
5203     if (scall == MAT_INITIAL_MATRIX) {
5204       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5205       *A_loc = mpimat->A;
5206     } else if (scall == MAT_REUSE_MATRIX) {
5207       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5208     }
5209     PetscFunctionReturn(PETSC_SUCCESS);
5210   }
5211 
5212   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5213   a  = (Mat_SeqAIJ *)mpimat->A->data;
5214   b  = (Mat_SeqAIJ *)mpimat->B->data;
5215   ai = a->i;
5216   aj = a->j;
5217   bi = b->i;
5218   bj = b->j;
5219   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5220   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5221   aa = aav;
5222   ba = bav;
5223   if (scall == MAT_INITIAL_MATRIX) {
5224     PetscCall(PetscMalloc1(1 + am, &ci));
5225     ci[0] = 0;
5226     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5227     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5228     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5229     k = 0;
5230     for (i = 0; i < am; i++) {
5231       ncols_o = bi[i + 1] - bi[i];
5232       ncols_d = ai[i + 1] - ai[i];
5233       /* off-diagonal portion of A */
5234       for (jo = 0; jo < ncols_o; jo++) {
5235         col = cmap[*bj];
5236         if (col >= cstart) break;
5237         cj[k] = col;
5238         bj++;
5239         ca[k++] = *ba++;
5240       }
5241       /* diagonal portion of A */
5242       for (j = 0; j < ncols_d; j++) {
5243         cj[k]   = cstart + *aj++;
5244         ca[k++] = *aa++;
5245       }
5246       /* off-diagonal portion of A */
5247       for (j = jo; j < ncols_o; j++) {
5248         cj[k]   = cmap[*bj++];
5249         ca[k++] = *ba++;
5250       }
5251     }
5252     /* put together the new matrix */
5253     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5254     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5255     /* Since these are PETSc arrays, change flags to free them as necessary. */
5256     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5257     mat->free_a  = PETSC_TRUE;
5258     mat->free_ij = PETSC_TRUE;
5259     mat->nonew   = 0;
5260   } else if (scall == MAT_REUSE_MATRIX) {
5261     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5262     ci  = mat->i;
5263     cj  = mat->j;
5264     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5265     for (i = 0; i < am; i++) {
5266       /* off-diagonal portion of A */
5267       ncols_o = bi[i + 1] - bi[i];
5268       for (jo = 0; jo < ncols_o; jo++) {
5269         col = cmap[*bj];
5270         if (col >= cstart) break;
5271         *cam++ = *ba++;
5272         bj++;
5273       }
5274       /* diagonal portion of A */
5275       ncols_d = ai[i + 1] - ai[i];
5276       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5277       /* off-diagonal portion of A */
5278       for (j = jo; j < ncols_o; j++) {
5279         *cam++ = *ba++;
5280         bj++;
5281       }
5282     }
5283     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5284   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5285   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5286   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5287   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5288   PetscFunctionReturn(PETSC_SUCCESS);
5289 }
5290 
5291 /*@
5292   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5293   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5294 
5295   Not Collective
5296 
5297   Input Parameters:
5298 + A     - the matrix
5299 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5300 
5301   Output Parameters:
5302 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5303 - A_loc - the local sequential matrix generated
5304 
5305   Level: developer
5306 
5307   Note:
5308   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5309   part, then those associated with the off-diagonal part (in its local ordering)
5310 
5311 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5312 @*/
5313 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5314 {
5315   Mat             Ao, Ad;
5316   const PetscInt *cmap;
5317   PetscMPIInt     size;
5318   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5319 
5320   PetscFunctionBegin;
5321   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5322   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5323   if (size == 1) {
5324     if (scall == MAT_INITIAL_MATRIX) {
5325       PetscCall(PetscObjectReference((PetscObject)Ad));
5326       *A_loc = Ad;
5327     } else if (scall == MAT_REUSE_MATRIX) {
5328       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5329     }
5330     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5331     PetscFunctionReturn(PETSC_SUCCESS);
5332   }
5333   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5334   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5335   if (f) {
5336     PetscCall((*f)(A, scall, glob, A_loc));
5337   } else {
5338     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5339     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5340     Mat_SeqAIJ        *c;
5341     PetscInt          *ai = a->i, *aj = a->j;
5342     PetscInt          *bi = b->i, *bj = b->j;
5343     PetscInt          *ci, *cj;
5344     const PetscScalar *aa, *ba;
5345     PetscScalar       *ca;
5346     PetscInt           i, j, am, dn, on;
5347 
5348     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5349     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5350     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5351     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5352     if (scall == MAT_INITIAL_MATRIX) {
5353       PetscInt k;
5354       PetscCall(PetscMalloc1(1 + am, &ci));
5355       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5356       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5357       ci[0] = 0;
5358       for (i = 0, k = 0; i < am; i++) {
5359         const PetscInt ncols_o = bi[i + 1] - bi[i];
5360         const PetscInt ncols_d = ai[i + 1] - ai[i];
5361         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5362         /* diagonal portion of A */
5363         for (j = 0; j < ncols_d; j++, k++) {
5364           cj[k] = *aj++;
5365           ca[k] = *aa++;
5366         }
5367         /* off-diagonal portion of A */
5368         for (j = 0; j < ncols_o; j++, k++) {
5369           cj[k] = dn + *bj++;
5370           ca[k] = *ba++;
5371         }
5372       }
5373       /* put together the new matrix */
5374       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5375       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5376       /* Since these are PETSc arrays, change flags to free them as necessary. */
5377       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5378       c->free_a  = PETSC_TRUE;
5379       c->free_ij = PETSC_TRUE;
5380       c->nonew   = 0;
5381       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5382     } else if (scall == MAT_REUSE_MATRIX) {
5383       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5384       for (i = 0; i < am; i++) {
5385         const PetscInt ncols_d = ai[i + 1] - ai[i];
5386         const PetscInt ncols_o = bi[i + 1] - bi[i];
5387         /* diagonal portion of A */
5388         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5389         /* off-diagonal portion of A */
5390         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5391       }
5392       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5393     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5394     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5395     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5396     if (glob) {
5397       PetscInt cst, *gidx;
5398 
5399       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5400       PetscCall(PetscMalloc1(dn + on, &gidx));
5401       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5402       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5403       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5404     }
5405   }
5406   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5407   PetscFunctionReturn(PETSC_SUCCESS);
5408 }
5409 
5410 /*@C
5411   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5412 
5413   Not Collective
5414 
5415   Input Parameters:
5416 + A     - the matrix
5417 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5418 . row   - index set of rows to extract (or `NULL`)
5419 - col   - index set of columns to extract (or `NULL`)
5420 
5421   Output Parameter:
5422 . A_loc - the local sequential matrix generated
5423 
5424   Level: developer
5425 
5426 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5427 @*/
5428 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5429 {
5430   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5431   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5432   IS          isrowa, iscola;
5433   Mat        *aloc;
5434   PetscBool   match;
5435 
5436   PetscFunctionBegin;
5437   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5438   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5439   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5440   if (!row) {
5441     start = A->rmap->rstart;
5442     end   = A->rmap->rend;
5443     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5444   } else {
5445     isrowa = *row;
5446   }
5447   if (!col) {
5448     start = A->cmap->rstart;
5449     cmap  = a->garray;
5450     nzA   = a->A->cmap->n;
5451     nzB   = a->B->cmap->n;
5452     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5453     ncols = 0;
5454     for (i = 0; i < nzB; i++) {
5455       if (cmap[i] < start) idx[ncols++] = cmap[i];
5456       else break;
5457     }
5458     imark = i;
5459     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5460     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5461     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5462   } else {
5463     iscola = *col;
5464   }
5465   if (scall != MAT_INITIAL_MATRIX) {
5466     PetscCall(PetscMalloc1(1, &aloc));
5467     aloc[0] = *A_loc;
5468   }
5469   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5470   if (!col) { /* attach global id of condensed columns */
5471     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5472   }
5473   *A_loc = aloc[0];
5474   PetscCall(PetscFree(aloc));
5475   if (!row) PetscCall(ISDestroy(&isrowa));
5476   if (!col) PetscCall(ISDestroy(&iscola));
5477   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5478   PetscFunctionReturn(PETSC_SUCCESS);
5479 }
5480 
5481 /*
5482  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5483  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5484  * on a global size.
5485  * */
5486 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5487 {
5488   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5489   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5490   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5491   PetscMPIInt            owner;
5492   PetscSFNode           *iremote, *oiremote;
5493   const PetscInt        *lrowindices;
5494   PetscSF                sf, osf;
5495   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5496   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5497   MPI_Comm               comm;
5498   ISLocalToGlobalMapping mapping;
5499   const PetscScalar     *pd_a, *po_a;
5500 
5501   PetscFunctionBegin;
5502   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5503   /* plocalsize is the number of roots
5504    * nrows is the number of leaves
5505    * */
5506   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5507   PetscCall(ISGetLocalSize(rows, &nrows));
5508   PetscCall(PetscCalloc1(nrows, &iremote));
5509   PetscCall(ISGetIndices(rows, &lrowindices));
5510   for (i = 0; i < nrows; i++) {
5511     /* Find a remote index and an owner for a row
5512      * The row could be local or remote
5513      * */
5514     owner = 0;
5515     lidx  = 0;
5516     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5517     iremote[i].index = lidx;
5518     iremote[i].rank  = owner;
5519   }
5520   /* Create SF to communicate how many nonzero columns for each row */
5521   PetscCall(PetscSFCreate(comm, &sf));
5522   /* SF will figure out the number of nonzero columns for each row, and their
5523    * offsets
5524    * */
5525   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5526   PetscCall(PetscSFSetFromOptions(sf));
5527   PetscCall(PetscSFSetUp(sf));
5528 
5529   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5530   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5531   PetscCall(PetscCalloc1(nrows, &pnnz));
5532   roffsets[0] = 0;
5533   roffsets[1] = 0;
5534   for (i = 0; i < plocalsize; i++) {
5535     /* diagonal */
5536     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5537     /* off-diagonal */
5538     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5539     /* compute offsets so that we relative location for each row */
5540     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5541     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5542   }
5543   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5544   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5545   /* 'r' means root, and 'l' means leaf */
5546   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5547   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5548   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5549   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5550   PetscCall(PetscSFDestroy(&sf));
5551   PetscCall(PetscFree(roffsets));
5552   PetscCall(PetscFree(nrcols));
5553   dntotalcols = 0;
5554   ontotalcols = 0;
5555   ncol        = 0;
5556   for (i = 0; i < nrows; i++) {
5557     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5558     ncol    = PetscMax(pnnz[i], ncol);
5559     /* diagonal */
5560     dntotalcols += nlcols[i * 2 + 0];
5561     /* off-diagonal */
5562     ontotalcols += nlcols[i * 2 + 1];
5563   }
5564   /* We do not need to figure the right number of columns
5565    * since all the calculations will be done by going through the raw data
5566    * */
5567   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5568   PetscCall(MatSetUp(*P_oth));
5569   PetscCall(PetscFree(pnnz));
5570   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5571   /* diagonal */
5572   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5573   /* off-diagonal */
5574   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5575   /* diagonal */
5576   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5577   /* off-diagonal */
5578   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5579   dntotalcols = 0;
5580   ontotalcols = 0;
5581   ntotalcols  = 0;
5582   for (i = 0; i < nrows; i++) {
5583     owner = 0;
5584     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5585     /* Set iremote for diag matrix */
5586     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5587       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5588       iremote[dntotalcols].rank  = owner;
5589       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5590       ilocal[dntotalcols++] = ntotalcols++;
5591     }
5592     /* off-diagonal */
5593     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5594       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5595       oiremote[ontotalcols].rank  = owner;
5596       oilocal[ontotalcols++]      = ntotalcols++;
5597     }
5598   }
5599   PetscCall(ISRestoreIndices(rows, &lrowindices));
5600   PetscCall(PetscFree(loffsets));
5601   PetscCall(PetscFree(nlcols));
5602   PetscCall(PetscSFCreate(comm, &sf));
5603   /* P serves as roots and P_oth is leaves
5604    * Diag matrix
5605    * */
5606   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5607   PetscCall(PetscSFSetFromOptions(sf));
5608   PetscCall(PetscSFSetUp(sf));
5609 
5610   PetscCall(PetscSFCreate(comm, &osf));
5611   /* off-diagonal */
5612   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5613   PetscCall(PetscSFSetFromOptions(osf));
5614   PetscCall(PetscSFSetUp(osf));
5615   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5616   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5617   /* operate on the matrix internal data to save memory */
5618   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5619   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5620   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5621   /* Convert to global indices for diag matrix */
5622   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5623   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5624   /* We want P_oth store global indices */
5625   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5626   /* Use memory scalable approach */
5627   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5628   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5629   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5630   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5631   /* Convert back to local indices */
5632   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5633   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5634   nout = 0;
5635   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5636   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5637   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5638   /* Exchange values */
5639   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5640   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5641   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5642   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5643   /* Stop PETSc from shrinking memory */
5644   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5645   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5646   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5647   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5648   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5649   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5650   PetscCall(PetscSFDestroy(&sf));
5651   PetscCall(PetscSFDestroy(&osf));
5652   PetscFunctionReturn(PETSC_SUCCESS);
5653 }
5654 
5655 /*
5656  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5657  * This supports MPIAIJ and MAIJ
5658  * */
5659 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5660 {
5661   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5662   Mat_SeqAIJ *p_oth;
5663   IS          rows, map;
5664   PetscHMapI  hamp;
5665   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5666   MPI_Comm    comm;
5667   PetscSF     sf, osf;
5668   PetscBool   has;
5669 
5670   PetscFunctionBegin;
5671   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5672   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5673   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5674    *  and then create a submatrix (that often is an overlapping matrix)
5675    * */
5676   if (reuse == MAT_INITIAL_MATRIX) {
5677     /* Use a hash table to figure out unique keys */
5678     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5679     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5680     count = 0;
5681     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5682     for (i = 0; i < a->B->cmap->n; i++) {
5683       key = a->garray[i] / dof;
5684       PetscCall(PetscHMapIHas(hamp, key, &has));
5685       if (!has) {
5686         mapping[i] = count;
5687         PetscCall(PetscHMapISet(hamp, key, count++));
5688       } else {
5689         /* Current 'i' has the same value the previous step */
5690         mapping[i] = count - 1;
5691       }
5692     }
5693     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5694     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5695     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5696     PetscCall(PetscCalloc1(htsize, &rowindices));
5697     off = 0;
5698     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5699     PetscCall(PetscHMapIDestroy(&hamp));
5700     PetscCall(PetscSortInt(htsize, rowindices));
5701     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5702     /* In case, the matrix was already created but users want to recreate the matrix */
5703     PetscCall(MatDestroy(P_oth));
5704     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5705     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5706     PetscCall(ISDestroy(&map));
5707     PetscCall(ISDestroy(&rows));
5708   } else if (reuse == MAT_REUSE_MATRIX) {
5709     /* If matrix was already created, we simply update values using SF objects
5710      * that as attached to the matrix earlier.
5711      */
5712     const PetscScalar *pd_a, *po_a;
5713 
5714     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5715     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5716     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5717     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5718     /* Update values in place */
5719     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5720     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5721     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5722     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5723     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5724     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5725     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5726     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5727   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5728   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5729   PetscFunctionReturn(PETSC_SUCCESS);
5730 }
5731 
5732 /*@C
5733   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5734 
5735   Collective
5736 
5737   Input Parameters:
5738 + A     - the first matrix in `MATMPIAIJ` format
5739 . B     - the second matrix in `MATMPIAIJ` format
5740 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5741 
5742   Output Parameters:
5743 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5744 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5745 - B_seq - the sequential matrix generated
5746 
5747   Level: developer
5748 
5749 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5750 @*/
5751 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5752 {
5753   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5754   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5755   IS          isrowb, iscolb;
5756   Mat        *bseq = NULL;
5757 
5758   PetscFunctionBegin;
5759   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5760              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5761   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5762 
5763   if (scall == MAT_INITIAL_MATRIX) {
5764     start = A->cmap->rstart;
5765     cmap  = a->garray;
5766     nzA   = a->A->cmap->n;
5767     nzB   = a->B->cmap->n;
5768     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5769     ncols = 0;
5770     for (i = 0; i < nzB; i++) { /* row < local row index */
5771       if (cmap[i] < start) idx[ncols++] = cmap[i];
5772       else break;
5773     }
5774     imark = i;
5775     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5776     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5777     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5778     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5779   } else {
5780     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5781     isrowb = *rowb;
5782     iscolb = *colb;
5783     PetscCall(PetscMalloc1(1, &bseq));
5784     bseq[0] = *B_seq;
5785   }
5786   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5787   *B_seq = bseq[0];
5788   PetscCall(PetscFree(bseq));
5789   if (!rowb) {
5790     PetscCall(ISDestroy(&isrowb));
5791   } else {
5792     *rowb = isrowb;
5793   }
5794   if (!colb) {
5795     PetscCall(ISDestroy(&iscolb));
5796   } else {
5797     *colb = iscolb;
5798   }
5799   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5800   PetscFunctionReturn(PETSC_SUCCESS);
5801 }
5802 
5803 /*
5804     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5805     of the OFF-DIAGONAL portion of local A
5806 
5807     Collective
5808 
5809    Input Parameters:
5810 +    A,B - the matrices in `MATMPIAIJ` format
5811 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5812 
5813    Output Parameter:
5814 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5815 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5816 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5817 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5818 
5819     Developer Note:
5820     This directly accesses information inside the VecScatter associated with the matrix-vector product
5821      for this matrix. This is not desirable..
5822 
5823     Level: developer
5824 
5825 */
5826 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5827 {
5828   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5829   Mat_SeqAIJ        *b_oth;
5830   VecScatter         ctx;
5831   MPI_Comm           comm;
5832   const PetscMPIInt *rprocs, *sprocs;
5833   const PetscInt    *srow, *rstarts, *sstarts;
5834   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5835   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5836   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5837   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5838   PetscMPIInt        size, tag, rank, nreqs;
5839 
5840   PetscFunctionBegin;
5841   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5842   PetscCallMPI(MPI_Comm_size(comm, &size));
5843 
5844   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5845              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5846   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5847   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5848 
5849   if (size == 1) {
5850     startsj_s = NULL;
5851     bufa_ptr  = NULL;
5852     *B_oth    = NULL;
5853     PetscFunctionReturn(PETSC_SUCCESS);
5854   }
5855 
5856   ctx = a->Mvctx;
5857   tag = ((PetscObject)ctx)->tag;
5858 
5859   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5860   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5861   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5862   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5863   PetscCall(PetscMalloc1(nreqs, &reqs));
5864   rwaits = reqs;
5865   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5866 
5867   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5868   if (scall == MAT_INITIAL_MATRIX) {
5869     /* i-array */
5870     /*  post receives */
5871     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5872     for (i = 0; i < nrecvs; i++) {
5873       rowlen = rvalues + rstarts[i] * rbs;
5874       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5875       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5876     }
5877 
5878     /* pack the outgoing message */
5879     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5880 
5881     sstartsj[0] = 0;
5882     rstartsj[0] = 0;
5883     len         = 0; /* total length of j or a array to be sent */
5884     if (nsends) {
5885       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5886       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5887     }
5888     for (i = 0; i < nsends; i++) {
5889       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5890       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5891       for (j = 0; j < nrows; j++) {
5892         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5893         for (l = 0; l < sbs; l++) {
5894           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5895 
5896           rowlen[j * sbs + l] = ncols;
5897 
5898           len += ncols;
5899           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5900         }
5901         k++;
5902       }
5903       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5904 
5905       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5906     }
5907     /* recvs and sends of i-array are completed */
5908     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5909     PetscCall(PetscFree(svalues));
5910 
5911     /* allocate buffers for sending j and a arrays */
5912     PetscCall(PetscMalloc1(len + 1, &bufj));
5913     PetscCall(PetscMalloc1(len + 1, &bufa));
5914 
5915     /* create i-array of B_oth */
5916     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5917 
5918     b_othi[0] = 0;
5919     len       = 0; /* total length of j or a array to be received */
5920     k         = 0;
5921     for (i = 0; i < nrecvs; i++) {
5922       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5923       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5924       for (j = 0; j < nrows; j++) {
5925         b_othi[k + 1] = b_othi[k] + rowlen[j];
5926         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5927         k++;
5928       }
5929       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5930     }
5931     PetscCall(PetscFree(rvalues));
5932 
5933     /* allocate space for j and a arrays of B_oth */
5934     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5935     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5936 
5937     /* j-array */
5938     /*  post receives of j-array */
5939     for (i = 0; i < nrecvs; i++) {
5940       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5941       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5942     }
5943 
5944     /* pack the outgoing message j-array */
5945     if (nsends) k = sstarts[0];
5946     for (i = 0; i < nsends; i++) {
5947       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5948       bufJ  = bufj + sstartsj[i];
5949       for (j = 0; j < nrows; j++) {
5950         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5951         for (ll = 0; ll < sbs; ll++) {
5952           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5953           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5954           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5955         }
5956       }
5957       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5958     }
5959 
5960     /* recvs and sends of j-array are completed */
5961     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5962   } else if (scall == MAT_REUSE_MATRIX) {
5963     sstartsj = *startsj_s;
5964     rstartsj = *startsj_r;
5965     bufa     = *bufa_ptr;
5966     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5967     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5968   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5969 
5970   /* a-array */
5971   /*  post receives of a-array */
5972   for (i = 0; i < nrecvs; i++) {
5973     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5974     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5975   }
5976 
5977   /* pack the outgoing message a-array */
5978   if (nsends) k = sstarts[0];
5979   for (i = 0; i < nsends; i++) {
5980     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5981     bufA  = bufa + sstartsj[i];
5982     for (j = 0; j < nrows; j++) {
5983       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5984       for (ll = 0; ll < sbs; ll++) {
5985         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5986         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5987         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5988       }
5989     }
5990     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5991   }
5992   /* recvs and sends of a-array are completed */
5993   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5994   PetscCall(PetscFree(reqs));
5995 
5996   if (scall == MAT_INITIAL_MATRIX) {
5997     /* put together the new matrix */
5998     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5999 
6000     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6001     /* Since these are PETSc arrays, change flags to free them as necessary. */
6002     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6003     b_oth->free_a  = PETSC_TRUE;
6004     b_oth->free_ij = PETSC_TRUE;
6005     b_oth->nonew   = 0;
6006 
6007     PetscCall(PetscFree(bufj));
6008     if (!startsj_s || !bufa_ptr) {
6009       PetscCall(PetscFree2(sstartsj, rstartsj));
6010       PetscCall(PetscFree(bufa_ptr));
6011     } else {
6012       *startsj_s = sstartsj;
6013       *startsj_r = rstartsj;
6014       *bufa_ptr  = bufa;
6015     }
6016   } else if (scall == MAT_REUSE_MATRIX) {
6017     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6018   }
6019 
6020   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6021   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6022   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6023   PetscFunctionReturn(PETSC_SUCCESS);
6024 }
6025 
6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6029 #if defined(PETSC_HAVE_MKL_SPARSE)
6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6031 #endif
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6034 #if defined(PETSC_HAVE_ELEMENTAL)
6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6036 #endif
6037 #if defined(PETSC_HAVE_SCALAPACK)
6038 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6039 #endif
6040 #if defined(PETSC_HAVE_HYPRE)
6041 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6042 #endif
6043 #if defined(PETSC_HAVE_CUDA)
6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6045 #endif
6046 #if defined(PETSC_HAVE_HIP)
6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6048 #endif
6049 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6051 #endif
6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6053 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6054 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6055 
6056 /*
6057     Computes (B'*A')' since computing B*A directly is untenable
6058 
6059                n                       p                          p
6060         [             ]       [             ]         [                 ]
6061       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6062         [             ]       [             ]         [                 ]
6063 
6064 */
6065 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6066 {
6067   Mat At, Bt, Ct;
6068 
6069   PetscFunctionBegin;
6070   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6071   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6072   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6073   PetscCall(MatDestroy(&At));
6074   PetscCall(MatDestroy(&Bt));
6075   PetscCall(MatTransposeSetPrecursor(Ct, C));
6076   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6077   PetscCall(MatDestroy(&Ct));
6078   PetscFunctionReturn(PETSC_SUCCESS);
6079 }
6080 
6081 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6082 {
6083   PetscBool cisdense;
6084 
6085   PetscFunctionBegin;
6086   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6087   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6088   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6089   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6090   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6091   PetscCall(MatSetUp(C));
6092 
6093   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6094   PetscFunctionReturn(PETSC_SUCCESS);
6095 }
6096 
6097 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6098 {
6099   Mat_Product *product = C->product;
6100   Mat          A = product->A, B = product->B;
6101 
6102   PetscFunctionBegin;
6103   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6104              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6105   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6106   C->ops->productsymbolic = MatProductSymbolic_AB;
6107   PetscFunctionReturn(PETSC_SUCCESS);
6108 }
6109 
6110 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6111 {
6112   Mat_Product *product = C->product;
6113 
6114   PetscFunctionBegin;
6115   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6116   PetscFunctionReturn(PETSC_SUCCESS);
6117 }
6118 
6119 /*
6120    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6121 
6122   Input Parameters:
6123 
6124     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6125     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6126 
6127     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6128 
6129     For Set1, j1[] contains column indices of the nonzeros.
6130     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6131     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6132     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6133 
6134     Similar for Set2.
6135 
6136     This routine merges the two sets of nonzeros row by row and removes repeats.
6137 
6138   Output Parameters: (memory is allocated by the caller)
6139 
6140     i[],j[]: the CSR of the merged matrix, which has m rows.
6141     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6142     imap2[]: similar to imap1[], but for Set2.
6143     Note we order nonzeros row-by-row and from left to right.
6144 */
6145 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6146 {
6147   PetscInt   r, m; /* Row index of mat */
6148   PetscCount t, t1, t2, b1, e1, b2, e2;
6149 
6150   PetscFunctionBegin;
6151   PetscCall(MatGetLocalSize(mat, &m, NULL));
6152   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6153   i[0]        = 0;
6154   for (r = 0; r < m; r++) { /* Do row by row merging */
6155     b1 = rowBegin1[r];
6156     e1 = rowEnd1[r];
6157     b2 = rowBegin2[r];
6158     e2 = rowEnd2[r];
6159     while (b1 < e1 && b2 < e2) {
6160       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6161         j[t]      = j1[b1];
6162         imap1[t1] = t;
6163         imap2[t2] = t;
6164         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6165         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6166         t1++;
6167         t2++;
6168         t++;
6169       } else if (j1[b1] < j2[b2]) {
6170         j[t]      = j1[b1];
6171         imap1[t1] = t;
6172         b1 += jmap1[t1 + 1] - jmap1[t1];
6173         t1++;
6174         t++;
6175       } else {
6176         j[t]      = j2[b2];
6177         imap2[t2] = t;
6178         b2 += jmap2[t2 + 1] - jmap2[t2];
6179         t2++;
6180         t++;
6181       }
6182     }
6183     /* Merge the remaining in either j1[] or j2[] */
6184     while (b1 < e1) {
6185       j[t]      = j1[b1];
6186       imap1[t1] = t;
6187       b1 += jmap1[t1 + 1] - jmap1[t1];
6188       t1++;
6189       t++;
6190     }
6191     while (b2 < e2) {
6192       j[t]      = j2[b2];
6193       imap2[t2] = t;
6194       b2 += jmap2[t2 + 1] - jmap2[t2];
6195       t2++;
6196       t++;
6197     }
6198     i[r + 1] = t;
6199   }
6200   PetscFunctionReturn(PETSC_SUCCESS);
6201 }
6202 
6203 /*
6204   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6205 
6206   Input Parameters:
6207     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6208     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6209       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6210 
6211       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6212       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6213 
6214   Output Parameters:
6215     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6216     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6217       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6218       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6219 
6220     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6221       Atot: number of entries belonging to the diagonal block.
6222       Annz: number of unique nonzeros belonging to the diagonal block.
6223       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6224         repeats (i.e., same 'i,j' pair).
6225       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6226         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6227 
6228       Atot: number of entries belonging to the diagonal block
6229       Annz: number of unique nonzeros belonging to the diagonal block.
6230 
6231     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6232 
6233     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6234 */
6235 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6236 {
6237   PetscInt    cstart, cend, rstart, rend, row, col;
6238   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6239   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6240   PetscCount  k, m, p, q, r, s, mid;
6241   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6242 
6243   PetscFunctionBegin;
6244   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6245   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6246   m = rend - rstart;
6247 
6248   /* Skip negative rows */
6249   for (k = 0; k < n; k++)
6250     if (i[k] >= 0) break;
6251 
6252   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6253      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6254   */
6255   while (k < n) {
6256     row = i[k];
6257     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6258     for (s = k; s < n; s++)
6259       if (i[s] != row) break;
6260 
6261     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6262     for (p = k; p < s; p++) {
6263       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6264       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6265     }
6266     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6267     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6268     rowBegin[row - rstart] = k;
6269     rowMid[row - rstart]   = mid;
6270     rowEnd[row - rstart]   = s;
6271 
6272     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6273     Atot += mid - k;
6274     Btot += s - mid;
6275 
6276     /* Count unique nonzeros of this diag row */
6277     for (p = k; p < mid;) {
6278       col = j[p];
6279       do {
6280         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6281         p++;
6282       } while (p < mid && j[p] == col);
6283       Annz++;
6284     }
6285 
6286     /* Count unique nonzeros of this offdiag row */
6287     for (p = mid; p < s;) {
6288       col = j[p];
6289       do {
6290         p++;
6291       } while (p < s && j[p] == col);
6292       Bnnz++;
6293     }
6294     k = s;
6295   }
6296 
6297   /* Allocation according to Atot, Btot, Annz, Bnnz */
6298   PetscCall(PetscMalloc1(Atot, &Aperm));
6299   PetscCall(PetscMalloc1(Btot, &Bperm));
6300   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6301   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6302 
6303   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6304   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6305   for (r = 0; r < m; r++) {
6306     k   = rowBegin[r];
6307     mid = rowMid[r];
6308     s   = rowEnd[r];
6309     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6310     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6311     Atot += mid - k;
6312     Btot += s - mid;
6313 
6314     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6315     for (p = k; p < mid;) {
6316       col = j[p];
6317       q   = p;
6318       do {
6319         p++;
6320       } while (p < mid && j[p] == col);
6321       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6322       Annz++;
6323     }
6324 
6325     for (p = mid; p < s;) {
6326       col = j[p];
6327       q   = p;
6328       do {
6329         p++;
6330       } while (p < s && j[p] == col);
6331       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6332       Bnnz++;
6333     }
6334   }
6335   /* Output */
6336   *Aperm_ = Aperm;
6337   *Annz_  = Annz;
6338   *Atot_  = Atot;
6339   *Ajmap_ = Ajmap;
6340   *Bperm_ = Bperm;
6341   *Bnnz_  = Bnnz;
6342   *Btot_  = Btot;
6343   *Bjmap_ = Bjmap;
6344   PetscFunctionReturn(PETSC_SUCCESS);
6345 }
6346 
6347 /*
6348   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6349 
6350   Input Parameters:
6351     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6352     nnz:  number of unique nonzeros in the merged matrix
6353     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6354     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6355 
6356   Output Parameter: (memory is allocated by the caller)
6357     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6358 
6359   Example:
6360     nnz1 = 4
6361     nnz  = 6
6362     imap = [1,3,4,5]
6363     jmap = [0,3,5,6,7]
6364    then,
6365     jmap_new = [0,0,3,3,5,6,7]
6366 */
6367 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6368 {
6369   PetscCount k, p;
6370 
6371   PetscFunctionBegin;
6372   jmap_new[0] = 0;
6373   p           = nnz;                /* p loops over jmap_new[] backwards */
6374   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6375     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6376   }
6377   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6378   PetscFunctionReturn(PETSC_SUCCESS);
6379 }
6380 
6381 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6382 {
6383   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6384 
6385   PetscFunctionBegin;
6386   PetscCall(PetscSFDestroy(&coo->sf));
6387   PetscCall(PetscFree(coo->Aperm1));
6388   PetscCall(PetscFree(coo->Bperm1));
6389   PetscCall(PetscFree(coo->Ajmap1));
6390   PetscCall(PetscFree(coo->Bjmap1));
6391   PetscCall(PetscFree(coo->Aimap2));
6392   PetscCall(PetscFree(coo->Bimap2));
6393   PetscCall(PetscFree(coo->Aperm2));
6394   PetscCall(PetscFree(coo->Bperm2));
6395   PetscCall(PetscFree(coo->Ajmap2));
6396   PetscCall(PetscFree(coo->Bjmap2));
6397   PetscCall(PetscFree(coo->Cperm1));
6398   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6399   PetscCall(PetscFree(coo));
6400   PetscFunctionReturn(PETSC_SUCCESS);
6401 }
6402 
6403 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6404 {
6405   MPI_Comm             comm;
6406   PetscMPIInt          rank, size;
6407   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6408   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6409   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6410   PetscContainer       container;
6411   MatCOOStruct_MPIAIJ *coo;
6412 
6413   PetscFunctionBegin;
6414   PetscCall(PetscFree(mpiaij->garray));
6415   PetscCall(VecDestroy(&mpiaij->lvec));
6416 #if defined(PETSC_USE_CTABLE)
6417   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6418 #else
6419   PetscCall(PetscFree(mpiaij->colmap));
6420 #endif
6421   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6422   mat->assembled     = PETSC_FALSE;
6423   mat->was_assembled = PETSC_FALSE;
6424 
6425   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6426   PetscCallMPI(MPI_Comm_size(comm, &size));
6427   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6428   PetscCall(PetscLayoutSetUp(mat->rmap));
6429   PetscCall(PetscLayoutSetUp(mat->cmap));
6430   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6431   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6432   PetscCall(MatGetLocalSize(mat, &m, &n));
6433   PetscCall(MatGetSize(mat, &M, &N));
6434 
6435   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6436   /* entries come first, then local rows, then remote rows.                     */
6437   PetscCount n1 = coo_n, *perm1;
6438   PetscInt  *i1 = coo_i, *j1 = coo_j;
6439 
6440   PetscCall(PetscMalloc1(n1, &perm1));
6441   for (k = 0; k < n1; k++) perm1[k] = k;
6442 
6443   /* Manipulate indices so that entries with negative row or col indices will have smallest
6444      row indices, local entries will have greater but negative row indices, and remote entries
6445      will have positive row indices.
6446   */
6447   for (k = 0; k < n1; k++) {
6448     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6449     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6450     else {
6451       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6452       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6453     }
6454   }
6455 
6456   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6457   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6458 
6459   /* Advance k to the first entry we need to take care of */
6460   for (k = 0; k < n1; k++)
6461     if (i1[k] > PETSC_MIN_INT) break;
6462   PetscInt i1start = k;
6463 
6464   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6465   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6466 
6467   /*           Send remote rows to their owner                                  */
6468   /* Find which rows should be sent to which remote ranks*/
6469   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6470   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6471   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6472   const PetscInt *ranges;
6473   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6474 
6475   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6476   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6477   for (k = rem; k < n1;) {
6478     PetscMPIInt owner;
6479     PetscInt    firstRow, lastRow;
6480 
6481     /* Locate a row range */
6482     firstRow = i1[k]; /* first row of this owner */
6483     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6484     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6485 
6486     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6487     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6488 
6489     /* All entries in [k,p) belong to this remote owner */
6490     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6491       PetscMPIInt *sendto2;
6492       PetscInt    *nentries2;
6493       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6494 
6495       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6496       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6497       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6498       PetscCall(PetscFree2(sendto, nentries2));
6499       sendto   = sendto2;
6500       nentries = nentries2;
6501       maxNsend = maxNsend2;
6502     }
6503     sendto[nsend]   = owner;
6504     nentries[nsend] = p - k;
6505     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6506     nsend++;
6507     k = p;
6508   }
6509 
6510   /* Build 1st SF to know offsets on remote to send data */
6511   PetscSF      sf1;
6512   PetscInt     nroots = 1, nroots2 = 0;
6513   PetscInt     nleaves = nsend, nleaves2 = 0;
6514   PetscInt    *offsets;
6515   PetscSFNode *iremote;
6516 
6517   PetscCall(PetscSFCreate(comm, &sf1));
6518   PetscCall(PetscMalloc1(nsend, &iremote));
6519   PetscCall(PetscMalloc1(nsend, &offsets));
6520   for (k = 0; k < nsend; k++) {
6521     iremote[k].rank  = sendto[k];
6522     iremote[k].index = 0;
6523     nleaves2 += nentries[k];
6524     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6525   }
6526   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6527   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6528   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6529   PetscCall(PetscSFDestroy(&sf1));
6530   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6531 
6532   /* Build 2nd SF to send remote COOs to their owner */
6533   PetscSF sf2;
6534   nroots  = nroots2;
6535   nleaves = nleaves2;
6536   PetscCall(PetscSFCreate(comm, &sf2));
6537   PetscCall(PetscSFSetFromOptions(sf2));
6538   PetscCall(PetscMalloc1(nleaves, &iremote));
6539   p = 0;
6540   for (k = 0; k < nsend; k++) {
6541     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6542     for (q = 0; q < nentries[k]; q++, p++) {
6543       iremote[p].rank  = sendto[k];
6544       iremote[p].index = offsets[k] + q;
6545     }
6546   }
6547   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6548 
6549   /* Send the remote COOs to their owner */
6550   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6551   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6552   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6553   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6554   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6555   PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */
6556   PetscInt *j1prem = j1 ? j1 + rem : NULL;
6557   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6558   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6559   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6560   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6561 
6562   PetscCall(PetscFree(offsets));
6563   PetscCall(PetscFree2(sendto, nentries));
6564 
6565   /* Sort received COOs by row along with the permutation array     */
6566   for (k = 0; k < n2; k++) perm2[k] = k;
6567   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6568 
6569   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6570   PetscCount *Cperm1;
6571   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6572   PetscCount *perm1prem = perm1 ? perm1 + rem : NULL;
6573   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6574   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6575 
6576   /* Support for HYPRE matrices, kind of a hack.
6577      Swap min column with diagonal so that diagonal values will go first */
6578   PetscBool   hypre;
6579   const char *name;
6580   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6581   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6582   if (hypre) {
6583     PetscInt *minj;
6584     PetscBT   hasdiag;
6585 
6586     PetscCall(PetscBTCreate(m, &hasdiag));
6587     PetscCall(PetscMalloc1(m, &minj));
6588     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6589     for (k = i1start; k < rem; k++) {
6590       if (j1[k] < cstart || j1[k] >= cend) continue;
6591       const PetscInt rindex = i1[k] - rstart;
6592       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6593       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6594     }
6595     for (k = 0; k < n2; k++) {
6596       if (j2[k] < cstart || j2[k] >= cend) continue;
6597       const PetscInt rindex = i2[k] - rstart;
6598       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6599       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6600     }
6601     for (k = i1start; k < rem; k++) {
6602       const PetscInt rindex = i1[k] - rstart;
6603       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6604       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6605       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6606     }
6607     for (k = 0; k < n2; k++) {
6608       const PetscInt rindex = i2[k] - rstart;
6609       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6610       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6611       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6612     }
6613     PetscCall(PetscBTDestroy(&hasdiag));
6614     PetscCall(PetscFree(minj));
6615   }
6616 
6617   /* Split local COOs and received COOs into diag/offdiag portions */
6618   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6619   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6620   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6621   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6622   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6623   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6624 
6625   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6626   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6627   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6628   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6629 
6630   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6631   PetscInt *Ai, *Bi;
6632   PetscInt *Aj, *Bj;
6633 
6634   PetscCall(PetscMalloc1(m + 1, &Ai));
6635   PetscCall(PetscMalloc1(m + 1, &Bi));
6636   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6637   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6638 
6639   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6640   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6641   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6642   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6643   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6644 
6645   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6646   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6647 
6648   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6649   /* expect nonzeros in A/B most likely have local contributing entries        */
6650   PetscInt    Annz = Ai[m];
6651   PetscInt    Bnnz = Bi[m];
6652   PetscCount *Ajmap1_new, *Bjmap1_new;
6653 
6654   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6655   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6656 
6657   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6658   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6659 
6660   PetscCall(PetscFree(Aimap1));
6661   PetscCall(PetscFree(Ajmap1));
6662   PetscCall(PetscFree(Bimap1));
6663   PetscCall(PetscFree(Bjmap1));
6664   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6665   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6666   PetscCall(PetscFree(perm1));
6667   PetscCall(PetscFree3(i2, j2, perm2));
6668 
6669   Ajmap1 = Ajmap1_new;
6670   Bjmap1 = Bjmap1_new;
6671 
6672   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6673   if (Annz < Annz1 + Annz2) {
6674     PetscInt *Aj_new;
6675     PetscCall(PetscMalloc1(Annz, &Aj_new));
6676     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6677     PetscCall(PetscFree(Aj));
6678     Aj = Aj_new;
6679   }
6680 
6681   if (Bnnz < Bnnz1 + Bnnz2) {
6682     PetscInt *Bj_new;
6683     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6684     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6685     PetscCall(PetscFree(Bj));
6686     Bj = Bj_new;
6687   }
6688 
6689   /* Create new submatrices for on-process and off-process coupling                  */
6690   PetscScalar     *Aa, *Ba;
6691   MatType          rtype;
6692   Mat_SeqAIJ      *a, *b;
6693   PetscObjectState state;
6694   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6695   PetscCall(PetscCalloc1(Bnnz, &Ba));
6696   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6697   if (cstart) {
6698     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6699   }
6700 
6701   PetscCall(MatGetRootType_Private(mat, &rtype));
6702 
6703   MatSeqXAIJGetOptions_Private(mpiaij->A);
6704   PetscCall(MatDestroy(&mpiaij->A));
6705   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6706   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6707   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6708 
6709   MatSeqXAIJGetOptions_Private(mpiaij->B);
6710   PetscCall(MatDestroy(&mpiaij->B));
6711   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6712   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6713   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6714 
6715   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6716   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6717   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6718   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6719 
6720   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6721   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6722   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6723   a->free_a = b->free_a = PETSC_TRUE;
6724   a->free_ij = b->free_ij = PETSC_TRUE;
6725 
6726   /* conversion must happen AFTER multiply setup */
6727   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6728   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6729   PetscCall(VecDestroy(&mpiaij->lvec));
6730   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6731 
6732   // Put the COO struct in a container and then attach that to the matrix
6733   PetscCall(PetscMalloc1(1, &coo));
6734   coo->n       = coo_n;
6735   coo->sf      = sf2;
6736   coo->sendlen = nleaves;
6737   coo->recvlen = nroots;
6738   coo->Annz    = Annz;
6739   coo->Bnnz    = Bnnz;
6740   coo->Annz2   = Annz2;
6741   coo->Bnnz2   = Bnnz2;
6742   coo->Atot1   = Atot1;
6743   coo->Atot2   = Atot2;
6744   coo->Btot1   = Btot1;
6745   coo->Btot2   = Btot2;
6746   coo->Ajmap1  = Ajmap1;
6747   coo->Aperm1  = Aperm1;
6748   coo->Bjmap1  = Bjmap1;
6749   coo->Bperm1  = Bperm1;
6750   coo->Aimap2  = Aimap2;
6751   coo->Ajmap2  = Ajmap2;
6752   coo->Aperm2  = Aperm2;
6753   coo->Bimap2  = Bimap2;
6754   coo->Bjmap2  = Bjmap2;
6755   coo->Bperm2  = Bperm2;
6756   coo->Cperm1  = Cperm1;
6757   // Allocate in preallocation. If not used, it has zero cost on host
6758   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6759   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6760   PetscCall(PetscContainerSetPointer(container, coo));
6761   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6762   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6763   PetscCall(PetscContainerDestroy(&container));
6764   PetscFunctionReturn(PETSC_SUCCESS);
6765 }
6766 
6767 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6768 {
6769   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6770   Mat                  A = mpiaij->A, B = mpiaij->B;
6771   PetscScalar         *Aa, *Ba;
6772   PetscScalar         *sendbuf, *recvbuf;
6773   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6774   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6775   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6776   const PetscCount    *Cperm1;
6777   PetscContainer       container;
6778   MatCOOStruct_MPIAIJ *coo;
6779 
6780   PetscFunctionBegin;
6781   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6782   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6783   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6784   sendbuf = coo->sendbuf;
6785   recvbuf = coo->recvbuf;
6786   Ajmap1  = coo->Ajmap1;
6787   Ajmap2  = coo->Ajmap2;
6788   Aimap2  = coo->Aimap2;
6789   Bjmap1  = coo->Bjmap1;
6790   Bjmap2  = coo->Bjmap2;
6791   Bimap2  = coo->Bimap2;
6792   Aperm1  = coo->Aperm1;
6793   Aperm2  = coo->Aperm2;
6794   Bperm1  = coo->Bperm1;
6795   Bperm2  = coo->Bperm2;
6796   Cperm1  = coo->Cperm1;
6797 
6798   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6799   PetscCall(MatSeqAIJGetArray(B, &Ba));
6800 
6801   /* Pack entries to be sent to remote */
6802   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6803 
6804   /* Send remote entries to their owner and overlap the communication with local computation */
6805   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6806   /* Add local entries to A and B */
6807   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6808     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6809     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6810     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6811   }
6812   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6813     PetscScalar sum = 0.0;
6814     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6815     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6816   }
6817   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6818 
6819   /* Add received remote entries to A and B */
6820   for (PetscCount i = 0; i < coo->Annz2; i++) {
6821     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6822   }
6823   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6824     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6825   }
6826   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6827   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6828   PetscFunctionReturn(PETSC_SUCCESS);
6829 }
6830 
6831 /*MC
6832    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6833 
6834    Options Database Keys:
6835 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6836 
6837    Level: beginner
6838 
6839    Notes:
6840    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6841     in this case the values associated with the rows and columns one passes in are set to zero
6842     in the matrix
6843 
6844     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6845     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6846 
6847 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6848 M*/
6849 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6850 {
6851   Mat_MPIAIJ *b;
6852   PetscMPIInt size;
6853 
6854   PetscFunctionBegin;
6855   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6856 
6857   PetscCall(PetscNew(&b));
6858   B->data       = (void *)b;
6859   B->ops[0]     = MatOps_Values;
6860   B->assembled  = PETSC_FALSE;
6861   B->insertmode = NOT_SET_VALUES;
6862   b->size       = size;
6863 
6864   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6865 
6866   /* build cache for off array entries formed */
6867   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6868 
6869   b->donotstash  = PETSC_FALSE;
6870   b->colmap      = NULL;
6871   b->garray      = NULL;
6872   b->roworiented = PETSC_TRUE;
6873 
6874   /* stuff used for matrix vector multiply */
6875   b->lvec  = NULL;
6876   b->Mvctx = NULL;
6877 
6878   /* stuff for MatGetRow() */
6879   b->rowindices   = NULL;
6880   b->rowvalues    = NULL;
6881   b->getrowactive = PETSC_FALSE;
6882 
6883   /* flexible pointer used in CUSPARSE classes */
6884   b->spptr = NULL;
6885 
6886   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6887   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6889   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6896 #if defined(PETSC_HAVE_CUDA)
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6898 #endif
6899 #if defined(PETSC_HAVE_HIP)
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6901 #endif
6902 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6904 #endif
6905 #if defined(PETSC_HAVE_MKL_SPARSE)
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6907 #endif
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6912 #if defined(PETSC_HAVE_ELEMENTAL)
6913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6914 #endif
6915 #if defined(PETSC_HAVE_SCALAPACK)
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6917 #endif
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6920 #if defined(PETSC_HAVE_HYPRE)
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6923 #endif
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6928   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6929   PetscFunctionReturn(PETSC_SUCCESS);
6930 }
6931 
6932 /*@C
6933   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6934   and "off-diagonal" part of the matrix in CSR format.
6935 
6936   Collective
6937 
6938   Input Parameters:
6939 + comm - MPI communicator
6940 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6941 . n    - This value should be the same as the local size used in creating the
6942          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6943          calculated if `N` is given) For square matrices `n` is almost always `m`.
6944 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6945 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6946 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6947 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6948 . a    - matrix values
6949 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6950 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6951 - oa   - matrix values
6952 
6953   Output Parameter:
6954 . mat - the matrix
6955 
6956   Level: advanced
6957 
6958   Notes:
6959   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6960   must free the arrays once the matrix has been destroyed and not before.
6961 
6962   The `i` and `j` indices are 0 based
6963 
6964   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6965 
6966   This sets local rows and cannot be used to set off-processor values.
6967 
6968   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6969   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6970   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6971   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6972   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6973   communication if it is known that only local entries will be set.
6974 
6975 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6976           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6977 @*/
6978 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6979 {
6980   Mat_MPIAIJ *maij;
6981 
6982   PetscFunctionBegin;
6983   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6984   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6985   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6986   PetscCall(MatCreate(comm, mat));
6987   PetscCall(MatSetSizes(*mat, m, n, M, N));
6988   PetscCall(MatSetType(*mat, MATMPIAIJ));
6989   maij = (Mat_MPIAIJ *)(*mat)->data;
6990 
6991   (*mat)->preallocated = PETSC_TRUE;
6992 
6993   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6994   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6995 
6996   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6997   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6998 
6999   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7000   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7001   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7002   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7003   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7004   PetscFunctionReturn(PETSC_SUCCESS);
7005 }
7006 
7007 typedef struct {
7008   Mat       *mp;    /* intermediate products */
7009   PetscBool *mptmp; /* is the intermediate product temporary ? */
7010   PetscInt   cp;    /* number of intermediate products */
7011 
7012   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7013   PetscInt    *startsj_s, *startsj_r;
7014   PetscScalar *bufa;
7015   Mat          P_oth;
7016 
7017   /* may take advantage of merging product->B */
7018   Mat Bloc; /* B-local by merging diag and off-diag */
7019 
7020   /* cusparse does not have support to split between symbolic and numeric phases.
7021      When api_user is true, we don't need to update the numerical values
7022      of the temporary storage */
7023   PetscBool reusesym;
7024 
7025   /* support for COO values insertion */
7026   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7027   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7028   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7029   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7030   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7031   PetscMemType mtype;
7032 
7033   /* customization */
7034   PetscBool abmerge;
7035   PetscBool P_oth_bind;
7036 } MatMatMPIAIJBACKEND;
7037 
7038 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7039 {
7040   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7041   PetscInt             i;
7042 
7043   PetscFunctionBegin;
7044   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7045   PetscCall(PetscFree(mmdata->bufa));
7046   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7047   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7048   PetscCall(MatDestroy(&mmdata->P_oth));
7049   PetscCall(MatDestroy(&mmdata->Bloc));
7050   PetscCall(PetscSFDestroy(&mmdata->sf));
7051   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7052   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7053   PetscCall(PetscFree(mmdata->own[0]));
7054   PetscCall(PetscFree(mmdata->own));
7055   PetscCall(PetscFree(mmdata->off[0]));
7056   PetscCall(PetscFree(mmdata->off));
7057   PetscCall(PetscFree(mmdata));
7058   PetscFunctionReturn(PETSC_SUCCESS);
7059 }
7060 
7061 /* Copy selected n entries with indices in idx[] of A to v[].
7062    If idx is NULL, copy the whole data array of A to v[]
7063  */
7064 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7065 {
7066   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7067 
7068   PetscFunctionBegin;
7069   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7070   if (f) {
7071     PetscCall((*f)(A, n, idx, v));
7072   } else {
7073     const PetscScalar *vv;
7074 
7075     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7076     if (n && idx) {
7077       PetscScalar    *w  = v;
7078       const PetscInt *oi = idx;
7079       PetscInt        j;
7080 
7081       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7082     } else {
7083       PetscCall(PetscArraycpy(v, vv, n));
7084     }
7085     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7086   }
7087   PetscFunctionReturn(PETSC_SUCCESS);
7088 }
7089 
7090 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7091 {
7092   MatMatMPIAIJBACKEND *mmdata;
7093   PetscInt             i, n_d, n_o;
7094 
7095   PetscFunctionBegin;
7096   MatCheckProduct(C, 1);
7097   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7098   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7099   if (!mmdata->reusesym) { /* update temporary matrices */
7100     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7101     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7102   }
7103   mmdata->reusesym = PETSC_FALSE;
7104 
7105   for (i = 0; i < mmdata->cp; i++) {
7106     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7107     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7108   }
7109   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7110     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7111 
7112     if (mmdata->mptmp[i]) continue;
7113     if (noff) {
7114       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7115 
7116       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7117       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7118       n_o += noff;
7119       n_d += nown;
7120     } else {
7121       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7122 
7123       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7124       n_d += mm->nz;
7125     }
7126   }
7127   if (mmdata->hasoffproc) { /* offprocess insertion */
7128     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7129     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7130   }
7131   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7132   PetscFunctionReturn(PETSC_SUCCESS);
7133 }
7134 
7135 /* Support for Pt * A, A * P, or Pt * A * P */
7136 #define MAX_NUMBER_INTERMEDIATE 4
7137 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7138 {
7139   Mat_Product           *product = C->product;
7140   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7141   Mat_MPIAIJ            *a, *p;
7142   MatMatMPIAIJBACKEND   *mmdata;
7143   ISLocalToGlobalMapping P_oth_l2g = NULL;
7144   IS                     glob      = NULL;
7145   const char            *prefix;
7146   char                   pprefix[256];
7147   const PetscInt        *globidx, *P_oth_idx;
7148   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7149   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7150   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7151                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7152                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7153   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7154 
7155   MatProductType ptype;
7156   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7157   PetscMPIInt    size;
7158 
7159   PetscFunctionBegin;
7160   MatCheckProduct(C, 1);
7161   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7162   ptype = product->type;
7163   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7164     ptype                                          = MATPRODUCT_AB;
7165     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7166   }
7167   switch (ptype) {
7168   case MATPRODUCT_AB:
7169     A          = product->A;
7170     P          = product->B;
7171     m          = A->rmap->n;
7172     n          = P->cmap->n;
7173     M          = A->rmap->N;
7174     N          = P->cmap->N;
7175     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7176     break;
7177   case MATPRODUCT_AtB:
7178     P          = product->A;
7179     A          = product->B;
7180     m          = P->cmap->n;
7181     n          = A->cmap->n;
7182     M          = P->cmap->N;
7183     N          = A->cmap->N;
7184     hasoffproc = PETSC_TRUE;
7185     break;
7186   case MATPRODUCT_PtAP:
7187     A          = product->A;
7188     P          = product->B;
7189     m          = P->cmap->n;
7190     n          = P->cmap->n;
7191     M          = P->cmap->N;
7192     N          = P->cmap->N;
7193     hasoffproc = PETSC_TRUE;
7194     break;
7195   default:
7196     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7197   }
7198   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7199   if (size == 1) hasoffproc = PETSC_FALSE;
7200 
7201   /* defaults */
7202   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7203     mp[i]    = NULL;
7204     mptmp[i] = PETSC_FALSE;
7205     rmapt[i] = -1;
7206     cmapt[i] = -1;
7207     rmapa[i] = NULL;
7208     cmapa[i] = NULL;
7209   }
7210 
7211   /* customization */
7212   PetscCall(PetscNew(&mmdata));
7213   mmdata->reusesym = product->api_user;
7214   if (ptype == MATPRODUCT_AB) {
7215     if (product->api_user) {
7216       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7217       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7218       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7219       PetscOptionsEnd();
7220     } else {
7221       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7222       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7223       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7224       PetscOptionsEnd();
7225     }
7226   } else if (ptype == MATPRODUCT_PtAP) {
7227     if (product->api_user) {
7228       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7229       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7230       PetscOptionsEnd();
7231     } else {
7232       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7233       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7234       PetscOptionsEnd();
7235     }
7236   }
7237   a = (Mat_MPIAIJ *)A->data;
7238   p = (Mat_MPIAIJ *)P->data;
7239   PetscCall(MatSetSizes(C, m, n, M, N));
7240   PetscCall(PetscLayoutSetUp(C->rmap));
7241   PetscCall(PetscLayoutSetUp(C->cmap));
7242   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7243   PetscCall(MatGetOptionsPrefix(C, &prefix));
7244 
7245   cp = 0;
7246   switch (ptype) {
7247   case MATPRODUCT_AB: /* A * P */
7248     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7249 
7250     /* A_diag * P_local (merged or not) */
7251     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7252       /* P is product->B */
7253       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7254       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7255       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7256       PetscCall(MatProductSetFill(mp[cp], product->fill));
7257       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7258       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7259       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7260       mp[cp]->product->api_user = product->api_user;
7261       PetscCall(MatProductSetFromOptions(mp[cp]));
7262       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7263       PetscCall(ISGetIndices(glob, &globidx));
7264       rmapt[cp] = 1;
7265       cmapt[cp] = 2;
7266       cmapa[cp] = globidx;
7267       mptmp[cp] = PETSC_FALSE;
7268       cp++;
7269     } else { /* A_diag * P_diag and A_diag * P_off */
7270       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7271       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7272       PetscCall(MatProductSetFill(mp[cp], product->fill));
7273       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7274       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7275       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7276       mp[cp]->product->api_user = product->api_user;
7277       PetscCall(MatProductSetFromOptions(mp[cp]));
7278       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7279       rmapt[cp] = 1;
7280       cmapt[cp] = 1;
7281       mptmp[cp] = PETSC_FALSE;
7282       cp++;
7283       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7284       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7285       PetscCall(MatProductSetFill(mp[cp], product->fill));
7286       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7287       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7288       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7289       mp[cp]->product->api_user = product->api_user;
7290       PetscCall(MatProductSetFromOptions(mp[cp]));
7291       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7292       rmapt[cp] = 1;
7293       cmapt[cp] = 2;
7294       cmapa[cp] = p->garray;
7295       mptmp[cp] = PETSC_FALSE;
7296       cp++;
7297     }
7298 
7299     /* A_off * P_other */
7300     if (mmdata->P_oth) {
7301       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7302       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7303       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7304       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7305       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7306       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7307       PetscCall(MatProductSetFill(mp[cp], product->fill));
7308       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7309       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7310       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7311       mp[cp]->product->api_user = product->api_user;
7312       PetscCall(MatProductSetFromOptions(mp[cp]));
7313       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7314       rmapt[cp] = 1;
7315       cmapt[cp] = 2;
7316       cmapa[cp] = P_oth_idx;
7317       mptmp[cp] = PETSC_FALSE;
7318       cp++;
7319     }
7320     break;
7321 
7322   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7323     /* A is product->B */
7324     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7325     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7326       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7327       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7328       PetscCall(MatProductSetFill(mp[cp], product->fill));
7329       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7330       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7331       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7332       mp[cp]->product->api_user = product->api_user;
7333       PetscCall(MatProductSetFromOptions(mp[cp]));
7334       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7335       PetscCall(ISGetIndices(glob, &globidx));
7336       rmapt[cp] = 2;
7337       rmapa[cp] = globidx;
7338       cmapt[cp] = 2;
7339       cmapa[cp] = globidx;
7340       mptmp[cp] = PETSC_FALSE;
7341       cp++;
7342     } else {
7343       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7344       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7345       PetscCall(MatProductSetFill(mp[cp], product->fill));
7346       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7347       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7348       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7349       mp[cp]->product->api_user = product->api_user;
7350       PetscCall(MatProductSetFromOptions(mp[cp]));
7351       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7352       PetscCall(ISGetIndices(glob, &globidx));
7353       rmapt[cp] = 1;
7354       cmapt[cp] = 2;
7355       cmapa[cp] = globidx;
7356       mptmp[cp] = PETSC_FALSE;
7357       cp++;
7358       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7359       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7360       PetscCall(MatProductSetFill(mp[cp], product->fill));
7361       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7362       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7363       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7364       mp[cp]->product->api_user = product->api_user;
7365       PetscCall(MatProductSetFromOptions(mp[cp]));
7366       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7367       rmapt[cp] = 2;
7368       rmapa[cp] = p->garray;
7369       cmapt[cp] = 2;
7370       cmapa[cp] = globidx;
7371       mptmp[cp] = PETSC_FALSE;
7372       cp++;
7373     }
7374     break;
7375   case MATPRODUCT_PtAP:
7376     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7377     /* P is product->B */
7378     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7379     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7380     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7381     PetscCall(MatProductSetFill(mp[cp], product->fill));
7382     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7383     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7384     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7385     mp[cp]->product->api_user = product->api_user;
7386     PetscCall(MatProductSetFromOptions(mp[cp]));
7387     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7388     PetscCall(ISGetIndices(glob, &globidx));
7389     rmapt[cp] = 2;
7390     rmapa[cp] = globidx;
7391     cmapt[cp] = 2;
7392     cmapa[cp] = globidx;
7393     mptmp[cp] = PETSC_FALSE;
7394     cp++;
7395     if (mmdata->P_oth) {
7396       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7397       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7398       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7399       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7400       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7401       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7402       PetscCall(MatProductSetFill(mp[cp], product->fill));
7403       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7404       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7405       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7406       mp[cp]->product->api_user = product->api_user;
7407       PetscCall(MatProductSetFromOptions(mp[cp]));
7408       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7409       mptmp[cp] = PETSC_TRUE;
7410       cp++;
7411       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7412       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7413       PetscCall(MatProductSetFill(mp[cp], product->fill));
7414       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7415       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7416       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7417       mp[cp]->product->api_user = product->api_user;
7418       PetscCall(MatProductSetFromOptions(mp[cp]));
7419       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7420       rmapt[cp] = 2;
7421       rmapa[cp] = globidx;
7422       cmapt[cp] = 2;
7423       cmapa[cp] = P_oth_idx;
7424       mptmp[cp] = PETSC_FALSE;
7425       cp++;
7426     }
7427     break;
7428   default:
7429     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7430   }
7431   /* sanity check */
7432   if (size > 1)
7433     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7434 
7435   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7436   for (i = 0; i < cp; i++) {
7437     mmdata->mp[i]    = mp[i];
7438     mmdata->mptmp[i] = mptmp[i];
7439   }
7440   mmdata->cp             = cp;
7441   C->product->data       = mmdata;
7442   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7443   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7444 
7445   /* memory type */
7446   mmdata->mtype = PETSC_MEMTYPE_HOST;
7447   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7448   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7449   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7450   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7451   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7452   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7453 
7454   /* prepare coo coordinates for values insertion */
7455 
7456   /* count total nonzeros of those intermediate seqaij Mats
7457     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7458     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7459     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7460   */
7461   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7462     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7463     if (mptmp[cp]) continue;
7464     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7465       const PetscInt *rmap = rmapa[cp];
7466       const PetscInt  mr   = mp[cp]->rmap->n;
7467       const PetscInt  rs   = C->rmap->rstart;
7468       const PetscInt  re   = C->rmap->rend;
7469       const PetscInt *ii   = mm->i;
7470       for (i = 0; i < mr; i++) {
7471         const PetscInt gr = rmap[i];
7472         const PetscInt nz = ii[i + 1] - ii[i];
7473         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7474         else ncoo_oown += nz;                  /* this row is local */
7475       }
7476     } else ncoo_d += mm->nz;
7477   }
7478 
7479   /*
7480     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7481 
7482     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7483 
7484     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7485 
7486     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7487     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7488     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7489 
7490     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7491     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7492   */
7493   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7494   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7495 
7496   /* gather (i,j) of nonzeros inserted by remote procs */
7497   if (hasoffproc) {
7498     PetscSF  msf;
7499     PetscInt ncoo2, *coo_i2, *coo_j2;
7500 
7501     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7502     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7503     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7504 
7505     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7506       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7507       PetscInt   *idxoff = mmdata->off[cp];
7508       PetscInt   *idxown = mmdata->own[cp];
7509       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7510         const PetscInt *rmap = rmapa[cp];
7511         const PetscInt *cmap = cmapa[cp];
7512         const PetscInt *ii   = mm->i;
7513         PetscInt       *coi  = coo_i + ncoo_o;
7514         PetscInt       *coj  = coo_j + ncoo_o;
7515         const PetscInt  mr   = mp[cp]->rmap->n;
7516         const PetscInt  rs   = C->rmap->rstart;
7517         const PetscInt  re   = C->rmap->rend;
7518         const PetscInt  cs   = C->cmap->rstart;
7519         for (i = 0; i < mr; i++) {
7520           const PetscInt *jj = mm->j + ii[i];
7521           const PetscInt  gr = rmap[i];
7522           const PetscInt  nz = ii[i + 1] - ii[i];
7523           if (gr < rs || gr >= re) { /* this is an offproc row */
7524             for (j = ii[i]; j < ii[i + 1]; j++) {
7525               *coi++    = gr;
7526               *idxoff++ = j;
7527             }
7528             if (!cmapt[cp]) { /* already global */
7529               for (j = 0; j < nz; j++) *coj++ = jj[j];
7530             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7531               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7532             } else { /* offdiag */
7533               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7534             }
7535             ncoo_o += nz;
7536           } else { /* this is a local row */
7537             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7538           }
7539         }
7540       }
7541       mmdata->off[cp + 1] = idxoff;
7542       mmdata->own[cp + 1] = idxown;
7543     }
7544 
7545     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7546     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7547     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7548     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7549     ncoo = ncoo_d + ncoo_oown + ncoo2;
7550     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7551     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7552     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7553     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7554     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7555     PetscCall(PetscFree2(coo_i, coo_j));
7556     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7557     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7558     coo_i = coo_i2;
7559     coo_j = coo_j2;
7560   } else { /* no offproc values insertion */
7561     ncoo = ncoo_d;
7562     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7563 
7564     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7565     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7566     PetscCall(PetscSFSetUp(mmdata->sf));
7567   }
7568   mmdata->hasoffproc = hasoffproc;
7569 
7570   /* gather (i,j) of nonzeros inserted locally */
7571   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7572     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7573     PetscInt       *coi  = coo_i + ncoo_d;
7574     PetscInt       *coj  = coo_j + ncoo_d;
7575     const PetscInt *jj   = mm->j;
7576     const PetscInt *ii   = mm->i;
7577     const PetscInt *cmap = cmapa[cp];
7578     const PetscInt *rmap = rmapa[cp];
7579     const PetscInt  mr   = mp[cp]->rmap->n;
7580     const PetscInt  rs   = C->rmap->rstart;
7581     const PetscInt  re   = C->rmap->rend;
7582     const PetscInt  cs   = C->cmap->rstart;
7583 
7584     if (mptmp[cp]) continue;
7585     if (rmapt[cp] == 1) { /* consecutive rows */
7586       /* fill coo_i */
7587       for (i = 0; i < mr; i++) {
7588         const PetscInt gr = i + rs;
7589         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7590       }
7591       /* fill coo_j */
7592       if (!cmapt[cp]) { /* type-0, already global */
7593         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7594       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7595         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7596       } else {                                            /* type-2, local to global for sparse columns */
7597         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7598       }
7599       ncoo_d += mm->nz;
7600     } else if (rmapt[cp] == 2) { /* sparse rows */
7601       for (i = 0; i < mr; i++) {
7602         const PetscInt *jj = mm->j + ii[i];
7603         const PetscInt  gr = rmap[i];
7604         const PetscInt  nz = ii[i + 1] - ii[i];
7605         if (gr >= rs && gr < re) { /* local rows */
7606           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7607           if (!cmapt[cp]) { /* type-0, already global */
7608             for (j = 0; j < nz; j++) *coj++ = jj[j];
7609           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7610             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7611           } else { /* type-2, local to global for sparse columns */
7612             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7613           }
7614           ncoo_d += nz;
7615         }
7616       }
7617     }
7618   }
7619   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7620   PetscCall(ISDestroy(&glob));
7621   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7622   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7623   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7624   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7625 
7626   /* preallocate with COO data */
7627   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7628   PetscCall(PetscFree2(coo_i, coo_j));
7629   PetscFunctionReturn(PETSC_SUCCESS);
7630 }
7631 
7632 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7633 {
7634   Mat_Product *product = mat->product;
7635 #if defined(PETSC_HAVE_DEVICE)
7636   PetscBool match  = PETSC_FALSE;
7637   PetscBool usecpu = PETSC_FALSE;
7638 #else
7639   PetscBool match = PETSC_TRUE;
7640 #endif
7641 
7642   PetscFunctionBegin;
7643   MatCheckProduct(mat, 1);
7644 #if defined(PETSC_HAVE_DEVICE)
7645   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7646   if (match) { /* we can always fallback to the CPU if requested */
7647     switch (product->type) {
7648     case MATPRODUCT_AB:
7649       if (product->api_user) {
7650         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7651         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7652         PetscOptionsEnd();
7653       } else {
7654         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7655         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7656         PetscOptionsEnd();
7657       }
7658       break;
7659     case MATPRODUCT_AtB:
7660       if (product->api_user) {
7661         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7662         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7663         PetscOptionsEnd();
7664       } else {
7665         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7666         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7667         PetscOptionsEnd();
7668       }
7669       break;
7670     case MATPRODUCT_PtAP:
7671       if (product->api_user) {
7672         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7673         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7674         PetscOptionsEnd();
7675       } else {
7676         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7677         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7678         PetscOptionsEnd();
7679       }
7680       break;
7681     default:
7682       break;
7683     }
7684     match = (PetscBool)!usecpu;
7685   }
7686 #endif
7687   if (match) {
7688     switch (product->type) {
7689     case MATPRODUCT_AB:
7690     case MATPRODUCT_AtB:
7691     case MATPRODUCT_PtAP:
7692       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7693       break;
7694     default:
7695       break;
7696     }
7697   }
7698   /* fallback to MPIAIJ ops */
7699   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7700   PetscFunctionReturn(PETSC_SUCCESS);
7701 }
7702 
7703 /*
7704    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7705 
7706    n - the number of block indices in cc[]
7707    cc - the block indices (must be large enough to contain the indices)
7708 */
7709 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7710 {
7711   PetscInt        cnt = -1, nidx, j;
7712   const PetscInt *idx;
7713 
7714   PetscFunctionBegin;
7715   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7716   if (nidx) {
7717     cnt     = 0;
7718     cc[cnt] = idx[0] / bs;
7719     for (j = 1; j < nidx; j++) {
7720       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7721     }
7722   }
7723   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7724   *n = cnt + 1;
7725   PetscFunctionReturn(PETSC_SUCCESS);
7726 }
7727 
7728 /*
7729     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7730 
7731     ncollapsed - the number of block indices
7732     collapsed - the block indices (must be large enough to contain the indices)
7733 */
7734 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7735 {
7736   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7737 
7738   PetscFunctionBegin;
7739   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7740   for (i = start + 1; i < start + bs; i++) {
7741     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7742     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7743     cprevtmp = cprev;
7744     cprev    = merged;
7745     merged   = cprevtmp;
7746   }
7747   *ncollapsed = nprev;
7748   if (collapsed) *collapsed = cprev;
7749   PetscFunctionReturn(PETSC_SUCCESS);
7750 }
7751 
7752 /*
7753  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7754 
7755  Input Parameter:
7756  . Amat - matrix
7757  - symmetrize - make the result symmetric
7758  + scale - scale with diagonal
7759 
7760  Output Parameter:
7761  . a_Gmat - output scalar graph >= 0
7762 
7763 */
7764 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7765 {
7766   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7767   MPI_Comm  comm;
7768   Mat       Gmat;
7769   PetscBool ismpiaij, isseqaij;
7770   Mat       a, b, c;
7771   MatType   jtype;
7772 
7773   PetscFunctionBegin;
7774   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7775   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7776   PetscCall(MatGetSize(Amat, &MM, &NN));
7777   PetscCall(MatGetBlockSize(Amat, &bs));
7778   nloc = (Iend - Istart) / bs;
7779 
7780   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7781   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7782   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7783 
7784   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7785   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7786      implementation */
7787   if (bs > 1) {
7788     PetscCall(MatGetType(Amat, &jtype));
7789     PetscCall(MatCreate(comm, &Gmat));
7790     PetscCall(MatSetType(Gmat, jtype));
7791     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7792     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7793     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7794       PetscInt  *d_nnz, *o_nnz;
7795       MatScalar *aa, val, *AA;
7796       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7797       if (isseqaij) {
7798         a = Amat;
7799         b = NULL;
7800       } else {
7801         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7802         a             = d->A;
7803         b             = d->B;
7804       }
7805       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7806       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7807       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7808         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7809         const PetscInt *cols1, *cols2;
7810         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7811           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7812           nnz[brow / bs] = nc2 / bs;
7813           if (nc2 % bs) ok = 0;
7814           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7815           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7816             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7817             if (nc1 != nc2) ok = 0;
7818             else {
7819               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7820                 if (cols1[jj] != cols2[jj]) ok = 0;
7821                 if (cols1[jj] % bs != jj % bs) ok = 0;
7822               }
7823             }
7824             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7825           }
7826           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7827           if (!ok) {
7828             PetscCall(PetscFree2(d_nnz, o_nnz));
7829             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7830             goto old_bs;
7831           }
7832         }
7833       }
7834       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7835       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7836       PetscCall(PetscFree2(d_nnz, o_nnz));
7837       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7838       // diag
7839       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7840         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7841         ai               = aseq->i;
7842         n                = ai[brow + 1] - ai[brow];
7843         aj               = aseq->j + ai[brow];
7844         for (int k = 0; k < n; k += bs) {        // block columns
7845           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7846           val        = 0;
7847           if (index_size == 0) {
7848             for (int ii = 0; ii < bs; ii++) { // rows in block
7849               aa = aseq->a + ai[brow + ii] + k;
7850               for (int jj = 0; jj < bs; jj++) {         // columns in block
7851                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7852               }
7853             }
7854           } else {                                       // use (index,index) value if provided
7855             for (int iii = 0; iii < index_size; iii++) { // rows in block
7856               int ii = index[iii];
7857               aa     = aseq->a + ai[brow + ii] + k;
7858               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7859                 int jj = index[jjj];
7860                 val += PetscAbs(PetscRealPart(aa[jj]));
7861               }
7862             }
7863           }
7864           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7865           AA[k / bs] = val;
7866         }
7867         grow = Istart / bs + brow / bs;
7868         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7869       }
7870       // off-diag
7871       if (ismpiaij) {
7872         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7873         const PetscScalar *vals;
7874         const PetscInt    *cols, *garray = aij->garray;
7875         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7876         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7877           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7878           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7879             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7880             AA[k / bs] = 0;
7881             AJ[cidx]   = garray[cols[k]] / bs;
7882           }
7883           nc = ncols / bs;
7884           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7885           if (index_size == 0) {
7886             for (int ii = 0; ii < bs; ii++) { // rows in block
7887               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7888               for (int k = 0; k < ncols; k += bs) {
7889                 for (int jj = 0; jj < bs; jj++) { // cols in block
7890                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7891                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7892                 }
7893               }
7894               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7895             }
7896           } else {                                       // use (index,index) value if provided
7897             for (int iii = 0; iii < index_size; iii++) { // rows in block
7898               int ii = index[iii];
7899               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7900               for (int k = 0; k < ncols; k += bs) {
7901                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7902                   int jj = index[jjj];
7903                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7904                 }
7905               }
7906               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7907             }
7908           }
7909           grow = Istart / bs + brow / bs;
7910           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7911         }
7912       }
7913       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7914       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7915       PetscCall(PetscFree2(AA, AJ));
7916     } else {
7917       const PetscScalar *vals;
7918       const PetscInt    *idx;
7919       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7920     old_bs:
7921       /*
7922        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7923        */
7924       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7925       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7926       if (isseqaij) {
7927         PetscInt max_d_nnz;
7928         /*
7929          Determine exact preallocation count for (sequential) scalar matrix
7930          */
7931         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7932         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7933         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7934         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7935         PetscCall(PetscFree3(w0, w1, w2));
7936       } else if (ismpiaij) {
7937         Mat             Daij, Oaij;
7938         const PetscInt *garray;
7939         PetscInt        max_d_nnz;
7940         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7941         /*
7942          Determine exact preallocation count for diagonal block portion of scalar matrix
7943          */
7944         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7945         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7946         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7947         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7948         PetscCall(PetscFree3(w0, w1, w2));
7949         /*
7950          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7951          */
7952         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7953           o_nnz[jj] = 0;
7954           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7955             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7956             o_nnz[jj] += ncols;
7957             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7958           }
7959           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7960         }
7961       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7962       /* get scalar copy (norms) of matrix */
7963       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7964       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7965       PetscCall(PetscFree2(d_nnz, o_nnz));
7966       for (Ii = Istart; Ii < Iend; Ii++) {
7967         PetscInt dest_row = Ii / bs;
7968         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7969         for (jj = 0; jj < ncols; jj++) {
7970           PetscInt    dest_col = idx[jj] / bs;
7971           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7972           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7973         }
7974         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7975       }
7976       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7977       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7978     }
7979   } else {
7980     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7981     else {
7982       Gmat = Amat;
7983       PetscCall(PetscObjectReference((PetscObject)Gmat));
7984     }
7985     if (isseqaij) {
7986       a = Gmat;
7987       b = NULL;
7988     } else {
7989       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7990       a             = d->A;
7991       b             = d->B;
7992     }
7993     if (filter >= 0 || scale) {
7994       /* take absolute value of each entry */
7995       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7996         MatInfo      info;
7997         PetscScalar *avals;
7998         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7999         PetscCall(MatSeqAIJGetArray(c, &avals));
8000         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8001         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8002       }
8003     }
8004   }
8005   if (symmetrize) {
8006     PetscBool isset, issym;
8007     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8008     if (!isset || !issym) {
8009       Mat matTrans;
8010       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8011       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8012       PetscCall(MatDestroy(&matTrans));
8013     }
8014     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8015   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8016   if (scale) {
8017     /* scale c for all diagonal values = 1 or -1 */
8018     Vec diag;
8019     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8020     PetscCall(MatGetDiagonal(Gmat, diag));
8021     PetscCall(VecReciprocal(diag));
8022     PetscCall(VecSqrtAbs(diag));
8023     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8024     PetscCall(VecDestroy(&diag));
8025   }
8026   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8027 
8028   if (filter >= 0) {
8029     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8030     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8031   }
8032   *a_Gmat = Gmat;
8033   PetscFunctionReturn(PETSC_SUCCESS);
8034 }
8035 
8036 /*
8037     Special version for direct calls from Fortran
8038 */
8039 
8040 /* Change these macros so can be used in void function */
8041 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8042 #undef PetscCall
8043 #define PetscCall(...) \
8044   do { \
8045     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8046     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8047       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8048       return; \
8049     } \
8050   } while (0)
8051 
8052 #undef SETERRQ
8053 #define SETERRQ(comm, ierr, ...) \
8054   do { \
8055     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8056     return; \
8057   } while (0)
8058 
8059 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8060   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8061 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8062   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8063 #else
8064 #endif
8065 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8066 {
8067   Mat         mat = *mmat;
8068   PetscInt    m = *mm, n = *mn;
8069   InsertMode  addv = *maddv;
8070   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8071   PetscScalar value;
8072 
8073   MatCheckPreallocated(mat, 1);
8074   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8075   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8076   {
8077     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8078     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8079     PetscBool roworiented = aij->roworiented;
8080 
8081     /* Some Variables required in the macro */
8082     Mat         A     = aij->A;
8083     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8084     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8085     MatScalar  *aa;
8086     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8087     Mat         B                 = aij->B;
8088     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8089     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8090     MatScalar  *ba;
8091     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8092      * cannot use "#if defined" inside a macro. */
8093     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8094 
8095     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8096     PetscInt   nonew = a->nonew;
8097     MatScalar *ap1, *ap2;
8098 
8099     PetscFunctionBegin;
8100     PetscCall(MatSeqAIJGetArray(A, &aa));
8101     PetscCall(MatSeqAIJGetArray(B, &ba));
8102     for (i = 0; i < m; i++) {
8103       if (im[i] < 0) continue;
8104       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8105       if (im[i] >= rstart && im[i] < rend) {
8106         row      = im[i] - rstart;
8107         lastcol1 = -1;
8108         rp1      = aj + ai[row];
8109         ap1      = aa + ai[row];
8110         rmax1    = aimax[row];
8111         nrow1    = ailen[row];
8112         low1     = 0;
8113         high1    = nrow1;
8114         lastcol2 = -1;
8115         rp2      = bj + bi[row];
8116         ap2      = ba + bi[row];
8117         rmax2    = bimax[row];
8118         nrow2    = bilen[row];
8119         low2     = 0;
8120         high2    = nrow2;
8121 
8122         for (j = 0; j < n; j++) {
8123           if (roworiented) value = v[i * n + j];
8124           else value = v[i + j * m];
8125           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8126           if (in[j] >= cstart && in[j] < cend) {
8127             col = in[j] - cstart;
8128             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8129           } else if (in[j] < 0) continue;
8130           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8131             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8132           } else {
8133             if (mat->was_assembled) {
8134               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8135 #if defined(PETSC_USE_CTABLE)
8136               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8137               col--;
8138 #else
8139               col = aij->colmap[in[j]] - 1;
8140 #endif
8141               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8142                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8143                 col = in[j];
8144                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8145                 B        = aij->B;
8146                 b        = (Mat_SeqAIJ *)B->data;
8147                 bimax    = b->imax;
8148                 bi       = b->i;
8149                 bilen    = b->ilen;
8150                 bj       = b->j;
8151                 rp2      = bj + bi[row];
8152                 ap2      = ba + bi[row];
8153                 rmax2    = bimax[row];
8154                 nrow2    = bilen[row];
8155                 low2     = 0;
8156                 high2    = nrow2;
8157                 bm       = aij->B->rmap->n;
8158                 ba       = b->a;
8159                 inserted = PETSC_FALSE;
8160               }
8161             } else col = in[j];
8162             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8163           }
8164         }
8165       } else if (!aij->donotstash) {
8166         if (roworiented) {
8167           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8168         } else {
8169           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8170         }
8171       }
8172     }
8173     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8174     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8175   }
8176   PetscFunctionReturnVoid();
8177 }
8178 
8179 /* Undefining these here since they were redefined from their original definition above! No
8180  * other PETSc functions should be defined past this point, as it is impossible to recover the
8181  * original definitions */
8182 #undef PetscCall
8183 #undef SETERRQ
8184