xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5a67bb2bab7ce296578be4e8d1213f21203fd3df)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166 
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
171 {
172   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
173 
174   PetscFunctionBegin;
175   if (mat->A) {
176     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
177     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
178   }
179   PetscFunctionReturn(PETSC_SUCCESS);
180 }
181 
182 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
183 {
184   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
185   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
186   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
187   const PetscInt  *ia, *ib;
188   const MatScalar *aa, *bb, *aav, *bav;
189   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
190   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
191 
192   PetscFunctionBegin;
193   *keptrows = NULL;
194 
195   ia = a->i;
196   ib = b->i;
197   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
198   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
199   for (i = 0; i < m; i++) {
200     na = ia[i + 1] - ia[i];
201     nb = ib[i + 1] - ib[i];
202     if (!na && !nb) {
203       cnt++;
204       goto ok1;
205     }
206     aa = aav + ia[i];
207     for (j = 0; j < na; j++) {
208       if (aa[j] != 0.0) goto ok1;
209     }
210     bb = bav ? bav + ib[i] : NULL;
211     for (j = 0; j < nb; j++) {
212       if (bb[j] != 0.0) goto ok1;
213     }
214     cnt++;
215   ok1:;
216   }
217   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
218   if (!n0rows) {
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
220     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
221     PetscFunctionReturn(PETSC_SUCCESS);
222   }
223   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
224   cnt = 0;
225   for (i = 0; i < m; i++) {
226     na = ia[i + 1] - ia[i];
227     nb = ib[i + 1] - ib[i];
228     if (!na && !nb) continue;
229     aa = aav + ia[i];
230     for (j = 0; j < na; j++) {
231       if (aa[j] != 0.0) {
232         rows[cnt++] = rstart + i;
233         goto ok2;
234       }
235     }
236     bb = bav ? bav + ib[i] : NULL;
237     for (j = 0; j < nb; j++) {
238       if (bb[j] != 0.0) {
239         rows[cnt++] = rstart + i;
240         goto ok2;
241       }
242     }
243   ok2:;
244   }
245   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
247   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
252 {
253   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
254   PetscBool   cong;
255 
256   PetscFunctionBegin;
257   PetscCall(MatHasCongruentLayouts(Y, &cong));
258   if (Y->assembled && cong) {
259     PetscCall(MatDiagonalSet(aij->A, D, is));
260   } else {
261     PetscCall(MatDiagonalSet_Default(Y, D, is));
262   }
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
267 {
268   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
269   PetscInt    i, rstart, nrows, *rows;
270 
271   PetscFunctionBegin;
272   *zrows = NULL;
273   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
274   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
275   for (i = 0; i < nrows; i++) rows[i] += rstart;
276   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
281 {
282   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
283   PetscInt           i, m, n, *garray = aij->garray;
284   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
285   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
286   PetscReal         *work;
287   const PetscScalar *dummy;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   if (type == NORM_INFINITY) {
313     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
314   } else {
315     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
316   }
317   PetscCall(PetscFree(work));
318   if (type == NORM_2) {
319     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
320   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
321     for (i = 0; i < n; i++) reductions[i] /= m;
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
327 {
328   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
329   IS              sis, gis;
330   const PetscInt *isis, *igis;
331   PetscInt        n, *iis, nsis, ngis, rstart, i;
332 
333   PetscFunctionBegin;
334   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
335   PetscCall(MatFindNonzeroRows(a->B, &gis));
336   PetscCall(ISGetSize(gis, &ngis));
337   PetscCall(ISGetSize(sis, &nsis));
338   PetscCall(ISGetIndices(sis, &isis));
339   PetscCall(ISGetIndices(gis, &igis));
340 
341   PetscCall(PetscMalloc1(ngis + nsis, &iis));
342   PetscCall(PetscArraycpy(iis, igis, ngis));
343   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
344   n = ngis + nsis;
345   PetscCall(PetscSortRemoveDupsInt(&n, iis));
346   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
347   for (i = 0; i < n; i++) iis[i] += rstart;
348   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
349 
350   PetscCall(ISRestoreIndices(sis, &isis));
351   PetscCall(ISRestoreIndices(gis, &igis));
352   PetscCall(ISDestroy(&sis));
353   PetscCall(ISDestroy(&gis));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*
358   Local utility routine that creates a mapping from the global column
359 number to the local number in the off-diagonal part of the local
360 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
361 a slightly higher hash table cost; without it it is not scalable (each processor
362 has an order N integer array but is fast to access.
363 */
364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
365 {
366   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
367   PetscInt    n   = aij->B->cmap->n, i;
368 
369   PetscFunctionBegin;
370   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
371 #if defined(PETSC_USE_CTABLE)
372   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
373   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
374 #else
375   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
376   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
377 #endif
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
382   do { \
383     if (col <= lastcol1) low1 = 0; \
384     else high1 = nrow1; \
385     lastcol1 = col; \
386     while (high1 - low1 > 5) { \
387       t = (low1 + high1) / 2; \
388       if (rp1[t] > col) high1 = t; \
389       else low1 = t; \
390     } \
391     for (_i = low1; _i < high1; _i++) { \
392       if (rp1[_i] > col) break; \
393       if (rp1[_i] == col) { \
394         if (addv == ADD_VALUES) { \
395           ap1[_i] += value; \
396           /* Not sure LogFlops will slow dow the code or not */ \
397           (void)PetscLogFlops(1.0); \
398         } else ap1[_i] = value; \
399         goto a_noinsert; \
400       } \
401     } \
402     if (value == 0.0 && ignorezeroentries && row != col) { \
403       low1  = 0; \
404       high1 = nrow1; \
405       goto a_noinsert; \
406     } \
407     if (nonew == 1) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
413     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
414     N = nrow1++ - 1; \
415     a->nz++; \
416     high1++; \
417     /* shift up all the later entries in this row */ \
418     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
419     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
420     rp1[_i] = col; \
421     ap1[_i] = value; \
422     A->nonzerostate++; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467     B->nonzerostate++; \
468   b_noinsert:; \
469     bilen[row] = nrow2; \
470   } while (0)
471 
472 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
473 {
474   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
475   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
476   PetscInt     l, *garray                         = mat->garray, diag;
477   PetscScalar *aa, *ba;
478 
479   PetscFunctionBegin;
480   /* code only works for square matrices A */
481 
482   /* find size of row to the left of the diagonal part */
483   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
484   row = row - diag;
485   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
486     if (garray[b->j[b->i[row] + l]] > diag) break;
487   }
488   if (l) {
489     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
490     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
491     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
492   }
493 
494   /* diagonal part */
495   if (a->i[row + 1] - a->i[row]) {
496     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
497     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
498     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
499   }
500 
501   /* right of diagonal part */
502   if (b->i[row + 1] - b->i[row] - l) {
503     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
504     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
505     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
506   }
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
511 {
512   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
513   PetscScalar value = 0.0;
514   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
515   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
516   PetscBool   roworiented = aij->roworiented;
517 
518   /* Some Variables required in the macro */
519   Mat         A     = aij->A;
520   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
521   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
522   PetscBool   ignorezeroentries = a->ignorezeroentries;
523   Mat         B                 = aij->B;
524   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
525   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
526   MatScalar  *aa, *ba;
527   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
528   PetscInt    nonew;
529   MatScalar  *ap1, *ap2;
530 
531   PetscFunctionBegin;
532   PetscCall(MatSeqAIJGetArray(A, &aa));
533   PetscCall(MatSeqAIJGetArray(B, &ba));
534   for (i = 0; i < m; i++) {
535     if (im[i] < 0) continue;
536     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
537     if (im[i] >= rstart && im[i] < rend) {
538       row      = im[i] - rstart;
539       lastcol1 = -1;
540       rp1      = aj ? aj + ai[row] : NULL;
541       ap1      = aa ? aa + ai[row] : NULL;
542       rmax1    = aimax[row];
543       nrow1    = ailen[row];
544       low1     = 0;
545       high1    = nrow1;
546       lastcol2 = -1;
547       rp2      = bj ? bj + bi[row] : NULL;
548       ap2      = ba ? ba + bi[row] : NULL;
549       rmax2    = bimax[row];
550       nrow2    = bilen[row];
551       low2     = 0;
552       high2    = nrow2;
553 
554       for (j = 0; j < n; j++) {
555         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
556         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
557         if (in[j] >= cstart && in[j] < cend) {
558           col   = in[j] - cstart;
559           nonew = a->nonew;
560           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
561         } else if (in[j] < 0) {
562           continue;
563         } else {
564           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
565           if (mat->was_assembled) {
566             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
567 #if defined(PETSC_USE_CTABLE)
568             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
569             col--;
570 #else
571             col = aij->colmap[in[j]] - 1;
572 #endif
573             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
574               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
575               col = in[j];
576               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
577               B     = aij->B;
578               b     = (Mat_SeqAIJ *)B->data;
579               bimax = b->imax;
580               bi    = b->i;
581               bilen = b->ilen;
582               bj    = b->j;
583               ba    = b->a;
584               rp2   = bj + bi[row];
585               ap2   = ba + bi[row];
586               rmax2 = bimax[row];
587               nrow2 = bilen[row];
588               low2  = 0;
589               high2 = nrow2;
590               bm    = aij->B->rmap->n;
591               ba    = b->a;
592             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
593               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
594                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
595               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
596             }
597           } else col = in[j];
598           nonew = b->nonew;
599           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
600         }
601       }
602     } else {
603       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
604       if (!aij->donotstash) {
605         mat->assembled = PETSC_FALSE;
606         if (roworiented) {
607           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v ? v + i * n : NULL, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
608         } else {
609           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v ? v + i : NULL, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
610         }
611       }
612     }
613   }
614   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
615   PetscCall(MatSeqAIJRestoreArray(B, &ba));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 /*
620     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
621     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
622     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
623 */
624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
625 {
626   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
627   Mat         A      = aij->A; /* diagonal part of the matrix */
628   Mat         B      = aij->B; /* off-diagonal part of the matrix */
629   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
630   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
632   PetscInt   *ailen = a->ilen, *aj = a->j;
633   PetscInt   *bilen = b->ilen, *bj = b->j;
634   PetscInt    am          = aij->A->rmap->n, j;
635   PetscInt    diag_so_far = 0, dnz;
636   PetscInt    offd_so_far = 0, onz;
637 
638   PetscFunctionBegin;
639   /* Iterate over all rows of the matrix */
640   for (j = 0; j < am; j++) {
641     dnz = onz = 0;
642     /*  Iterate over all non-zero columns of the current row */
643     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
644       /* If column is in the diagonal */
645       if (mat_j[col] >= cstart && mat_j[col] < cend) {
646         aj[diag_so_far++] = mat_j[col] - cstart;
647         dnz++;
648       } else { /* off-diagonal entries */
649         bj[offd_so_far++] = mat_j[col];
650         onz++;
651       }
652     }
653     ailen[j] = dnz;
654     bilen[j] = onz;
655   }
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 /*
660     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
661     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
662     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
663     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
664     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
665 */
666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
667 {
668   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
669   Mat          A    = aij->A; /* diagonal part of the matrix */
670   Mat          B    = aij->B; /* off-diagonal part of the matrix */
671   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
672   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
673   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
674   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
675   PetscInt    *ailen = a->ilen, *aj = a->j;
676   PetscInt    *bilen = b->ilen, *bj = b->j;
677   PetscInt     am          = aij->A->rmap->n, j;
678   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
679   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
680   PetscScalar *aa = a->a, *ba = b->a;
681 
682   PetscFunctionBegin;
683   /* Iterate over all rows of the matrix */
684   for (j = 0; j < am; j++) {
685     dnz_row = onz_row = 0;
686     rowstart_offd     = full_offd_i[j];
687     rowstart_diag     = full_diag_i[j];
688     /*  Iterate over all non-zero columns of the current row */
689     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
690       /* If column is in the diagonal */
691       if (mat_j[col] >= cstart && mat_j[col] < cend) {
692         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
693         aa[rowstart_diag + dnz_row] = mat_a[col];
694         dnz_row++;
695       } else { /* off-diagonal entries */
696         bj[rowstart_offd + onz_row] = mat_j[col];
697         ba[rowstart_offd + onz_row] = mat_a[col];
698         onz_row++;
699       }
700     }
701     ailen[j] = dnz_row;
702     bilen[j] = onz_row;
703   }
704   PetscFunctionReturn(PETSC_SUCCESS);
705 }
706 
707 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
708 {
709   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
710   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
711   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
712 
713   PetscFunctionBegin;
714   for (i = 0; i < m; i++) {
715     if (idxm[i] < 0) continue; /* negative row */
716     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
717     if (idxm[i] >= rstart && idxm[i] < rend) {
718       row = idxm[i] - rstart;
719       for (j = 0; j < n; j++) {
720         if (idxn[j] < 0) continue; /* negative column */
721         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
722         if (idxn[j] >= cstart && idxn[j] < cend) {
723           col = idxn[j] - cstart;
724           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
725         } else {
726           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
727 #if defined(PETSC_USE_CTABLE)
728           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
729           col--;
730 #else
731           col = aij->colmap[idxn[j]] - 1;
732 #endif
733           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
734           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
735         }
736       }
737     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
738   }
739   PetscFunctionReturn(PETSC_SUCCESS);
740 }
741 
742 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
743 {
744   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
745   PetscInt    nstash, reallocs;
746 
747   PetscFunctionBegin;
748   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
749 
750   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
751   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
752   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
753   PetscFunctionReturn(PETSC_SUCCESS);
754 }
755 
756 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
757 {
758   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
759   PetscMPIInt  n;
760   PetscInt     i, j, rstart, ncols, flg;
761   PetscInt    *row, *col;
762   PetscBool    other_disassembled;
763   PetscScalar *val;
764 
765   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
766 
767   PetscFunctionBegin;
768   if (!aij->donotstash && !mat->nooffprocentries) {
769     while (1) {
770       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
771       if (!flg) break;
772 
773       for (i = 0; i < n;) {
774         /* Now identify the consecutive vals belonging to the same row */
775         for (j = i, rstart = row[j]; j < n; j++) {
776           if (row[j] != rstart) break;
777         }
778         if (j < n) ncols = j - i;
779         else ncols = n - i;
780         /* Now assemble all these values with a single function call */
781         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
782         i = j;
783       }
784     }
785     PetscCall(MatStashScatterEnd_Private(&mat->stash));
786   }
787 #if defined(PETSC_HAVE_DEVICE)
788   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
789   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
790   if (mat->boundtocpu) {
791     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
792     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
793   }
794 #endif
795   PetscCall(MatAssemblyBegin(aij->A, mode));
796   PetscCall(MatAssemblyEnd(aij->A, mode));
797 
798   /* determine if any processor has disassembled, if so we must
799      also disassemble ourself, in order that we may reassemble. */
800   /*
801      if nonzero structure of submatrix B cannot change then we know that
802      no processor disassembled thus we can skip this stuff
803   */
804   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
805     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
806     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
807       PetscCall(MatDisAssemble_MPIAIJ(mat));
808     }
809   }
810   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
811   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
812 #if defined(PETSC_HAVE_DEVICE)
813   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
814 #endif
815   PetscCall(MatAssemblyBegin(aij->B, mode));
816   PetscCall(MatAssemblyEnd(aij->B, mode));
817 
818   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
819 
820   aij->rowvalues = NULL;
821 
822   PetscCall(VecDestroy(&aij->diag));
823 
824   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
825   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
826     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
827     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
828   }
829 #if defined(PETSC_HAVE_DEVICE)
830   mat->offloadmask = PETSC_OFFLOAD_BOTH;
831 #endif
832   PetscFunctionReturn(PETSC_SUCCESS);
833 }
834 
835 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
836 {
837   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
838 
839   PetscFunctionBegin;
840   PetscCall(MatZeroEntries(l->A));
841   PetscCall(MatZeroEntries(l->B));
842   PetscFunctionReturn(PETSC_SUCCESS);
843 }
844 
845 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
846 {
847   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
848   PetscObjectState sA, sB;
849   PetscInt        *lrows;
850   PetscInt         r, len;
851   PetscBool        cong, lch, gch;
852 
853   PetscFunctionBegin;
854   /* get locally owned rows */
855   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
856   PetscCall(MatHasCongruentLayouts(A, &cong));
857   /* fix right hand side if needed */
858   if (x && b) {
859     const PetscScalar *xx;
860     PetscScalar       *bb;
861 
862     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
863     PetscCall(VecGetArrayRead(x, &xx));
864     PetscCall(VecGetArray(b, &bb));
865     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
866     PetscCall(VecRestoreArrayRead(x, &xx));
867     PetscCall(VecRestoreArray(b, &bb));
868   }
869 
870   sA = mat->A->nonzerostate;
871   sB = mat->B->nonzerostate;
872 
873   if (diag != 0.0 && cong) {
874     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
875     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
876   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
877     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
878     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
879     PetscInt    nnwA, nnwB;
880     PetscBool   nnzA, nnzB;
881 
882     nnwA = aijA->nonew;
883     nnwB = aijB->nonew;
884     nnzA = aijA->keepnonzeropattern;
885     nnzB = aijB->keepnonzeropattern;
886     if (!nnzA) {
887       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
888       aijA->nonew = 0;
889     }
890     if (!nnzB) {
891       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
892       aijB->nonew = 0;
893     }
894     /* Must zero here before the next loop */
895     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
896     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
897     for (r = 0; r < len; ++r) {
898       const PetscInt row = lrows[r] + A->rmap->rstart;
899       if (row >= A->cmap->N) continue;
900       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
901     }
902     aijA->nonew = nnwA;
903     aijB->nonew = nnwB;
904   } else {
905     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
906     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
907   }
908   PetscCall(PetscFree(lrows));
909   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
910   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
911 
912   /* reduce nonzerostate */
913   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
914   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
915   if (gch) A->nonzerostate++;
916   PetscFunctionReturn(PETSC_SUCCESS);
917 }
918 
919 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
920 {
921   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
922   PetscMPIInt        n = A->rmap->n;
923   PetscInt           i, j, r, m, len = 0;
924   PetscInt          *lrows, *owners = A->rmap->range;
925   PetscMPIInt        p = 0;
926   PetscSFNode       *rrows;
927   PetscSF            sf;
928   const PetscScalar *xx;
929   PetscScalar       *bb, *mask, *aij_a;
930   Vec                xmask, lmask;
931   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
932   const PetscInt    *aj, *ii, *ridx;
933   PetscScalar       *aa;
934 
935   PetscFunctionBegin;
936   /* Create SF where leaves are input rows and roots are owned rows */
937   PetscCall(PetscMalloc1(n, &lrows));
938   for (r = 0; r < n; ++r) lrows[r] = -1;
939   PetscCall(PetscMalloc1(N, &rrows));
940   for (r = 0; r < N; ++r) {
941     const PetscInt idx = rows[r];
942     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
943     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
944       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
945     }
946     rrows[r].rank  = p;
947     rrows[r].index = rows[r] - owners[p];
948   }
949   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
950   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
951   /* Collect flags for rows to be zeroed */
952   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
953   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
954   PetscCall(PetscSFDestroy(&sf));
955   /* Compress and put in row numbers */
956   for (r = 0; r < n; ++r)
957     if (lrows[r] >= 0) lrows[len++] = r;
958   /* zero diagonal part of matrix */
959   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
960   /* handle off-diagonal part of matrix */
961   PetscCall(MatCreateVecs(A, &xmask, NULL));
962   PetscCall(VecDuplicate(l->lvec, &lmask));
963   PetscCall(VecGetArray(xmask, &bb));
964   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
965   PetscCall(VecRestoreArray(xmask, &bb));
966   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
967   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
968   PetscCall(VecDestroy(&xmask));
969   if (x && b) { /* this code is buggy when the row and column layout don't match */
970     PetscBool cong;
971 
972     PetscCall(MatHasCongruentLayouts(A, &cong));
973     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
974     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
975     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
976     PetscCall(VecGetArrayRead(l->lvec, &xx));
977     PetscCall(VecGetArray(b, &bb));
978   }
979   PetscCall(VecGetArray(lmask, &mask));
980   /* remove zeroed rows of off-diagonal matrix */
981   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
982   ii = aij->i;
983   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
984   /* loop over all elements of off process part of matrix zeroing removed columns*/
985   if (aij->compressedrow.use) {
986     m    = aij->compressedrow.nrows;
987     ii   = aij->compressedrow.i;
988     ridx = aij->compressedrow.rindex;
989     for (i = 0; i < m; i++) {
990       n  = ii[i + 1] - ii[i];
991       aj = aij->j + ii[i];
992       aa = aij_a + ii[i];
993 
994       for (j = 0; j < n; j++) {
995         if (PetscAbsScalar(mask[*aj])) {
996           if (b) bb[*ridx] -= *aa * xx[*aj];
997           *aa = 0.0;
998         }
999         aa++;
1000         aj++;
1001       }
1002       ridx++;
1003     }
1004   } else { /* do not use compressed row format */
1005     m = l->B->rmap->n;
1006     for (i = 0; i < m; i++) {
1007       n  = ii[i + 1] - ii[i];
1008       aj = aij->j + ii[i];
1009       aa = aij_a + ii[i];
1010       for (j = 0; j < n; j++) {
1011         if (PetscAbsScalar(mask[*aj])) {
1012           if (b) bb[i] -= *aa * xx[*aj];
1013           *aa = 0.0;
1014         }
1015         aa++;
1016         aj++;
1017       }
1018     }
1019   }
1020   if (x && b) {
1021     PetscCall(VecRestoreArray(b, &bb));
1022     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1023   }
1024   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1025   PetscCall(VecRestoreArray(lmask, &mask));
1026   PetscCall(VecDestroy(&lmask));
1027   PetscCall(PetscFree(lrows));
1028 
1029   /* only change matrix nonzero state if pattern was allowed to be changed */
1030   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1031     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1032     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1033   }
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
1037 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1038 {
1039   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1040   PetscInt    nt;
1041   VecScatter  Mvctx = a->Mvctx;
1042 
1043   PetscFunctionBegin;
1044   PetscCall(VecGetLocalSize(xx, &nt));
1045   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1046   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1047   PetscUseTypeMethod(a->A, mult, xx, yy);
1048   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1049   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1050   PetscFunctionReturn(PETSC_SUCCESS);
1051 }
1052 
1053 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1054 {
1055   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1056 
1057   PetscFunctionBegin;
1058   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1059   PetscFunctionReturn(PETSC_SUCCESS);
1060 }
1061 
1062 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1063 {
1064   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1065   VecScatter  Mvctx = a->Mvctx;
1066 
1067   PetscFunctionBegin;
1068   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1069   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1070   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1071   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1072   PetscFunctionReturn(PETSC_SUCCESS);
1073 }
1074 
1075 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1076 {
1077   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1078 
1079   PetscFunctionBegin;
1080   /* do nondiagonal part */
1081   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1082   /* do local part */
1083   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1084   /* add partial results together */
1085   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1086   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1091 {
1092   MPI_Comm    comm;
1093   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1094   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1095   IS          Me, Notme;
1096   PetscInt    M, N, first, last, *notme, i;
1097   PetscBool   lf;
1098   PetscMPIInt size;
1099 
1100   PetscFunctionBegin;
1101   /* Easy test: symmetric diagonal block */
1102   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1103   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1104   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1105   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1106   PetscCallMPI(MPI_Comm_size(comm, &size));
1107   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1108 
1109   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1110   PetscCall(MatGetSize(Amat, &M, &N));
1111   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1112   PetscCall(PetscMalloc1(N - last + first, &notme));
1113   for (i = 0; i < first; i++) notme[i] = i;
1114   for (i = last; i < M; i++) notme[i - last + first] = i;
1115   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1116   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1117   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1118   Aoff = Aoffs[0];
1119   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1120   Boff = Boffs[0];
1121   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1122   PetscCall(MatDestroyMatrices(1, &Aoffs));
1123   PetscCall(MatDestroyMatrices(1, &Boffs));
1124   PetscCall(ISDestroy(&Me));
1125   PetscCall(ISDestroy(&Notme));
1126   PetscCall(PetscFree(notme));
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1131 {
1132   PetscFunctionBegin;
1133   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 
1137 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1138 {
1139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1140 
1141   PetscFunctionBegin;
1142   /* do nondiagonal part */
1143   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1144   /* do local part */
1145   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1146   /* add partial results together */
1147   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1148   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1149   PetscFunctionReturn(PETSC_SUCCESS);
1150 }
1151 
1152 /*
1153   This only works correctly for square matrices where the subblock A->A is the
1154    diagonal block
1155 */
1156 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1157 {
1158   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1159 
1160   PetscFunctionBegin;
1161   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1162   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1163   PetscCall(MatGetDiagonal(a->A, v));
1164   PetscFunctionReturn(PETSC_SUCCESS);
1165 }
1166 
1167 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1168 {
1169   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1170 
1171   PetscFunctionBegin;
1172   PetscCall(MatScale(a->A, aa));
1173   PetscCall(MatScale(a->B, aa));
1174   PetscFunctionReturn(PETSC_SUCCESS);
1175 }
1176 
1177 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1178 {
1179   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1180   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1181   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1182   const PetscInt    *garray = aij->garray;
1183   const PetscScalar *aa, *ba;
1184   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1185   PetscInt64         nz, hnz;
1186   PetscInt          *rowlens;
1187   PetscInt          *colidxs;
1188   PetscScalar       *matvals;
1189   PetscMPIInt        rank;
1190 
1191   PetscFunctionBegin;
1192   PetscCall(PetscViewerSetUp(viewer));
1193 
1194   M  = mat->rmap->N;
1195   N  = mat->cmap->N;
1196   m  = mat->rmap->n;
1197   rs = mat->rmap->rstart;
1198   cs = mat->cmap->rstart;
1199   nz = A->nz + B->nz;
1200 
1201   /* write matrix header */
1202   header[0] = MAT_FILE_CLASSID;
1203   header[1] = M;
1204   header[2] = N;
1205   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1206   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1207   if (rank == 0) {
1208     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1209     else header[3] = (PetscInt)hnz;
1210   }
1211   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1212 
1213   /* fill in and store row lengths  */
1214   PetscCall(PetscMalloc1(m, &rowlens));
1215   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1216   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1217   PetscCall(PetscFree(rowlens));
1218 
1219   /* fill in and store column indices */
1220   PetscCall(PetscMalloc1(nz, &colidxs));
1221   for (cnt = 0, i = 0; i < m; i++) {
1222     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1223       if (garray[B->j[jb]] > cs) break;
1224       colidxs[cnt++] = garray[B->j[jb]];
1225     }
1226     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1227     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1228   }
1229   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1230   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1231   PetscCall(PetscFree(colidxs));
1232 
1233   /* fill in and store nonzero values */
1234   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1235   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1236   PetscCall(PetscMalloc1(nz, &matvals));
1237   for (cnt = 0, i = 0; i < m; i++) {
1238     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1239       if (garray[B->j[jb]] > cs) break;
1240       matvals[cnt++] = ba[jb];
1241     }
1242     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1243     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1244   }
1245   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1246   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1247   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1248   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1249   PetscCall(PetscFree(matvals));
1250 
1251   /* write block size option to the viewer's .info file */
1252   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1253   PetscFunctionReturn(PETSC_SUCCESS);
1254 }
1255 
1256 #include <petscdraw.h>
1257 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1258 {
1259   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1260   PetscMPIInt       rank = aij->rank, size = aij->size;
1261   PetscBool         isdraw, iascii, isbinary;
1262   PetscViewer       sviewer;
1263   PetscViewerFormat format;
1264 
1265   PetscFunctionBegin;
1266   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1267   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1268   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1269   if (iascii) {
1270     PetscCall(PetscViewerGetFormat(viewer, &format));
1271     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1272       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1273       PetscCall(PetscMalloc1(size, &nz));
1274       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1275       for (i = 0; i < (PetscInt)size; i++) {
1276         nmax = PetscMax(nmax, nz[i]);
1277         nmin = PetscMin(nmin, nz[i]);
1278         navg += nz[i];
1279       }
1280       PetscCall(PetscFree(nz));
1281       navg = navg / size;
1282       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1283       PetscFunctionReturn(PETSC_SUCCESS);
1284     }
1285     PetscCall(PetscViewerGetFormat(viewer, &format));
1286     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1287       MatInfo   info;
1288       PetscInt *inodes = NULL;
1289 
1290       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1291       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1292       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1293       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1294       if (!inodes) {
1295         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1296                                                      (double)info.memory));
1297       } else {
1298         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1299                                                      (double)info.memory));
1300       }
1301       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1302       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1303       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1304       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1305       PetscCall(PetscViewerFlush(viewer));
1306       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1307       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1308       PetscCall(VecScatterView(aij->Mvctx, viewer));
1309       PetscFunctionReturn(PETSC_SUCCESS);
1310     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1311       PetscInt inodecount, inodelimit, *inodes;
1312       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1313       if (inodes) {
1314         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1315       } else {
1316         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1317       }
1318       PetscFunctionReturn(PETSC_SUCCESS);
1319     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1320       PetscFunctionReturn(PETSC_SUCCESS);
1321     }
1322   } else if (isbinary) {
1323     if (size == 1) {
1324       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1325       PetscCall(MatView(aij->A, viewer));
1326     } else {
1327       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1328     }
1329     PetscFunctionReturn(PETSC_SUCCESS);
1330   } else if (iascii && size == 1) {
1331     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1332     PetscCall(MatView(aij->A, viewer));
1333     PetscFunctionReturn(PETSC_SUCCESS);
1334   } else if (isdraw) {
1335     PetscDraw draw;
1336     PetscBool isnull;
1337     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1338     PetscCall(PetscDrawIsNull(draw, &isnull));
1339     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1340   }
1341 
1342   { /* assemble the entire matrix onto first processor */
1343     Mat A = NULL, Av;
1344     IS  isrow, iscol;
1345 
1346     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1347     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1348     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1349     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1350     /*  The commented code uses MatCreateSubMatrices instead */
1351     /*
1352     Mat *AA, A = NULL, Av;
1353     IS  isrow,iscol;
1354 
1355     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1356     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1357     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1358     if (rank == 0) {
1359        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1360        A    = AA[0];
1361        Av   = AA[0];
1362     }
1363     PetscCall(MatDestroySubMatrices(1,&AA));
1364 */
1365     PetscCall(ISDestroy(&iscol));
1366     PetscCall(ISDestroy(&isrow));
1367     /*
1368        Everyone has to call to draw the matrix since the graphics waits are
1369        synchronized across all processors that share the PetscDraw object
1370     */
1371     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1372     if (rank == 0) {
1373       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1374       PetscCall(MatView_SeqAIJ(Av, sviewer));
1375     }
1376     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1377     PetscCall(MatDestroy(&A));
1378   }
1379   PetscFunctionReturn(PETSC_SUCCESS);
1380 }
1381 
1382 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1383 {
1384   PetscBool iascii, isdraw, issocket, isbinary;
1385 
1386   PetscFunctionBegin;
1387   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1388   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1389   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1390   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1391   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1392   PetscFunctionReturn(PETSC_SUCCESS);
1393 }
1394 
1395 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1396 {
1397   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1398   Vec         bb1 = NULL;
1399   PetscBool   hasop;
1400 
1401   PetscFunctionBegin;
1402   if (flag == SOR_APPLY_UPPER) {
1403     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1404     PetscFunctionReturn(PETSC_SUCCESS);
1405   }
1406 
1407   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1408 
1409   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1410     if (flag & SOR_ZERO_INITIAL_GUESS) {
1411       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1412       its--;
1413     }
1414 
1415     while (its--) {
1416       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1417       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1418 
1419       /* update rhs: bb1 = bb - B*x */
1420       PetscCall(VecScale(mat->lvec, -1.0));
1421       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1422 
1423       /* local sweep */
1424       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1425     }
1426   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1427     if (flag & SOR_ZERO_INITIAL_GUESS) {
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1429       its--;
1430     }
1431     while (its--) {
1432       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1433       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1434 
1435       /* update rhs: bb1 = bb - B*x */
1436       PetscCall(VecScale(mat->lvec, -1.0));
1437       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1438 
1439       /* local sweep */
1440       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1441     }
1442   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1443     if (flag & SOR_ZERO_INITIAL_GUESS) {
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1445       its--;
1446     }
1447     while (its--) {
1448       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1449       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1450 
1451       /* update rhs: bb1 = bb - B*x */
1452       PetscCall(VecScale(mat->lvec, -1.0));
1453       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1454 
1455       /* local sweep */
1456       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1457     }
1458   } else if (flag & SOR_EISENSTAT) {
1459     Vec xx1;
1460 
1461     PetscCall(VecDuplicate(bb, &xx1));
1462     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1463 
1464     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1465     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1466     if (!mat->diag) {
1467       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1468       PetscCall(MatGetDiagonal(matin, mat->diag));
1469     }
1470     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1471     if (hasop) {
1472       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1473     } else {
1474       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1475     }
1476     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1477 
1478     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1479 
1480     /* local sweep */
1481     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1482     PetscCall(VecAXPY(xx, 1.0, xx1));
1483     PetscCall(VecDestroy(&xx1));
1484   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1485 
1486   PetscCall(VecDestroy(&bb1));
1487 
1488   matin->factorerrortype = mat->A->factorerrortype;
1489   PetscFunctionReturn(PETSC_SUCCESS);
1490 }
1491 
1492 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1493 {
1494   Mat             aA, aB, Aperm;
1495   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1496   PetscScalar    *aa, *ba;
1497   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1498   PetscSF         rowsf, sf;
1499   IS              parcolp = NULL;
1500   PetscBool       done;
1501 
1502   PetscFunctionBegin;
1503   PetscCall(MatGetLocalSize(A, &m, &n));
1504   PetscCall(ISGetIndices(rowp, &rwant));
1505   PetscCall(ISGetIndices(colp, &cwant));
1506   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1507 
1508   /* Invert row permutation to find out where my rows should go */
1509   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1510   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1511   PetscCall(PetscSFSetFromOptions(rowsf));
1512   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1513   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1514   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1515 
1516   /* Invert column permutation to find out where my columns should go */
1517   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1518   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1519   PetscCall(PetscSFSetFromOptions(sf));
1520   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1521   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1522   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1523   PetscCall(PetscSFDestroy(&sf));
1524 
1525   PetscCall(ISRestoreIndices(rowp, &rwant));
1526   PetscCall(ISRestoreIndices(colp, &cwant));
1527   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1528 
1529   /* Find out where my gcols should go */
1530   PetscCall(MatGetSize(aB, NULL, &ng));
1531   PetscCall(PetscMalloc1(ng, &gcdest));
1532   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1533   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1534   PetscCall(PetscSFSetFromOptions(sf));
1535   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1536   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1537   PetscCall(PetscSFDestroy(&sf));
1538 
1539   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1540   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1541   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1542   for (i = 0; i < m; i++) {
1543     PetscInt    row = rdest[i];
1544     PetscMPIInt rowner;
1545     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1546     for (j = ai[i]; j < ai[i + 1]; j++) {
1547       PetscInt    col = cdest[aj[j]];
1548       PetscMPIInt cowner;
1549       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1550       if (rowner == cowner) dnnz[i]++;
1551       else onnz[i]++;
1552     }
1553     for (j = bi[i]; j < bi[i + 1]; j++) {
1554       PetscInt    col = gcdest[bj[j]];
1555       PetscMPIInt cowner;
1556       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1557       if (rowner == cowner) dnnz[i]++;
1558       else onnz[i]++;
1559     }
1560   }
1561   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1562   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1563   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1564   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1565   PetscCall(PetscSFDestroy(&rowsf));
1566 
1567   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1568   PetscCall(MatSeqAIJGetArray(aA, &aa));
1569   PetscCall(MatSeqAIJGetArray(aB, &ba));
1570   for (i = 0; i < m; i++) {
1571     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1572     PetscInt  j0, rowlen;
1573     rowlen = ai[i + 1] - ai[i];
1574     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1575       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1576       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1577     }
1578     rowlen = bi[i + 1] - bi[i];
1579     for (j0 = j = 0; j < rowlen; j0 = j) {
1580       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1581       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1582     }
1583   }
1584   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1585   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1586   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1587   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1588   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1589   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1590   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1591   PetscCall(PetscFree3(work, rdest, cdest));
1592   PetscCall(PetscFree(gcdest));
1593   if (parcolp) PetscCall(ISDestroy(&colp));
1594   *B = Aperm;
1595   PetscFunctionReturn(PETSC_SUCCESS);
1596 }
1597 
1598 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1599 {
1600   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1601 
1602   PetscFunctionBegin;
1603   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1604   if (ghosts) *ghosts = aij->garray;
1605   PetscFunctionReturn(PETSC_SUCCESS);
1606 }
1607 
1608 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1609 {
1610   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1611   Mat            A = mat->A, B = mat->B;
1612   PetscLogDouble isend[5], irecv[5];
1613 
1614   PetscFunctionBegin;
1615   info->block_size = 1.0;
1616   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1617 
1618   isend[0] = info->nz_used;
1619   isend[1] = info->nz_allocated;
1620   isend[2] = info->nz_unneeded;
1621   isend[3] = info->memory;
1622   isend[4] = info->mallocs;
1623 
1624   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1625 
1626   isend[0] += info->nz_used;
1627   isend[1] += info->nz_allocated;
1628   isend[2] += info->nz_unneeded;
1629   isend[3] += info->memory;
1630   isend[4] += info->mallocs;
1631   if (flag == MAT_LOCAL) {
1632     info->nz_used      = isend[0];
1633     info->nz_allocated = isend[1];
1634     info->nz_unneeded  = isend[2];
1635     info->memory       = isend[3];
1636     info->mallocs      = isend[4];
1637   } else if (flag == MAT_GLOBAL_MAX) {
1638     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1639 
1640     info->nz_used      = irecv[0];
1641     info->nz_allocated = irecv[1];
1642     info->nz_unneeded  = irecv[2];
1643     info->memory       = irecv[3];
1644     info->mallocs      = irecv[4];
1645   } else if (flag == MAT_GLOBAL_SUM) {
1646     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1647 
1648     info->nz_used      = irecv[0];
1649     info->nz_allocated = irecv[1];
1650     info->nz_unneeded  = irecv[2];
1651     info->memory       = irecv[3];
1652     info->mallocs      = irecv[4];
1653   }
1654   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1655   info->fill_ratio_needed = 0;
1656   info->factor_mallocs    = 0;
1657   PetscFunctionReturn(PETSC_SUCCESS);
1658 }
1659 
1660 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1661 {
1662   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1663 
1664   PetscFunctionBegin;
1665   switch (op) {
1666   case MAT_NEW_NONZERO_LOCATIONS:
1667   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1668   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1669   case MAT_KEEP_NONZERO_PATTERN:
1670   case MAT_NEW_NONZERO_LOCATION_ERR:
1671   case MAT_USE_INODES:
1672   case MAT_IGNORE_ZERO_ENTRIES:
1673   case MAT_FORM_EXPLICIT_TRANSPOSE:
1674     MatCheckPreallocated(A, 1);
1675     PetscCall(MatSetOption(a->A, op, flg));
1676     PetscCall(MatSetOption(a->B, op, flg));
1677     break;
1678   case MAT_ROW_ORIENTED:
1679     MatCheckPreallocated(A, 1);
1680     a->roworiented = flg;
1681 
1682     PetscCall(MatSetOption(a->A, op, flg));
1683     PetscCall(MatSetOption(a->B, op, flg));
1684     break;
1685   case MAT_FORCE_DIAGONAL_ENTRIES:
1686   case MAT_SORTED_FULL:
1687     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1688     break;
1689   case MAT_IGNORE_OFF_PROC_ENTRIES:
1690     a->donotstash = flg;
1691     break;
1692   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1693   case MAT_SPD:
1694   case MAT_SYMMETRIC:
1695   case MAT_STRUCTURALLY_SYMMETRIC:
1696   case MAT_HERMITIAN:
1697   case MAT_SYMMETRY_ETERNAL:
1698   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1699   case MAT_SPD_ETERNAL:
1700     /* if the diagonal matrix is square it inherits some of the properties above */
1701     break;
1702   case MAT_SUBMAT_SINGLEIS:
1703     A->submat_singleis = flg;
1704     break;
1705   case MAT_STRUCTURE_ONLY:
1706     /* The option is handled directly by MatSetOption() */
1707     break;
1708   default:
1709     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1710   }
1711   PetscFunctionReturn(PETSC_SUCCESS);
1712 }
1713 
1714 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1715 {
1716   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1717   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1718   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1719   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1720   PetscInt    *cmap, *idx_p;
1721 
1722   PetscFunctionBegin;
1723   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1724   mat->getrowactive = PETSC_TRUE;
1725 
1726   if (!mat->rowvalues && (idx || v)) {
1727     /*
1728         allocate enough space to hold information from the longest row.
1729     */
1730     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1731     PetscInt    max = 1, tmp;
1732     for (i = 0; i < matin->rmap->n; i++) {
1733       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1734       if (max < tmp) max = tmp;
1735     }
1736     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1737   }
1738 
1739   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1740   lrow = row - rstart;
1741 
1742   pvA = &vworkA;
1743   pcA = &cworkA;
1744   pvB = &vworkB;
1745   pcB = &cworkB;
1746   if (!v) {
1747     pvA = NULL;
1748     pvB = NULL;
1749   }
1750   if (!idx) {
1751     pcA = NULL;
1752     if (!v) pcB = NULL;
1753   }
1754   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1755   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1756   nztot = nzA + nzB;
1757 
1758   cmap = mat->garray;
1759   if (v || idx) {
1760     if (nztot) {
1761       /* Sort by increasing column numbers, assuming A and B already sorted */
1762       PetscInt imark = -1;
1763       if (v) {
1764         *v = v_p = mat->rowvalues;
1765         for (i = 0; i < nzB; i++) {
1766           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1767           else break;
1768         }
1769         imark = i;
1770         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1771         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1772       }
1773       if (idx) {
1774         *idx = idx_p = mat->rowindices;
1775         if (imark > -1) {
1776           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1777         } else {
1778           for (i = 0; i < nzB; i++) {
1779             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1780             else break;
1781           }
1782           imark = i;
1783         }
1784         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1785         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1786       }
1787     } else {
1788       if (idx) *idx = NULL;
1789       if (v) *v = NULL;
1790     }
1791   }
1792   *nz = nztot;
1793   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1794   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1795   PetscFunctionReturn(PETSC_SUCCESS);
1796 }
1797 
1798 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1799 {
1800   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1801 
1802   PetscFunctionBegin;
1803   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1804   aij->getrowactive = PETSC_FALSE;
1805   PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807 
1808 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1809 {
1810   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1811   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1812   PetscInt         i, j, cstart = mat->cmap->rstart;
1813   PetscReal        sum = 0.0;
1814   const MatScalar *v, *amata, *bmata;
1815 
1816   PetscFunctionBegin;
1817   if (aij->size == 1) {
1818     PetscCall(MatNorm(aij->A, type, norm));
1819   } else {
1820     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1821     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1822     if (type == NORM_FROBENIUS) {
1823       v = amata;
1824       for (i = 0; i < amat->nz; i++) {
1825         sum += PetscRealPart(PetscConj(*v) * (*v));
1826         v++;
1827       }
1828       v = bmata;
1829       for (i = 0; i < bmat->nz; i++) {
1830         sum += PetscRealPart(PetscConj(*v) * (*v));
1831         v++;
1832       }
1833       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1834       *norm = PetscSqrtReal(*norm);
1835       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1836     } else if (type == NORM_1) { /* max column norm */
1837       PetscReal *tmp, *tmp2;
1838       PetscInt  *jj, *garray = aij->garray;
1839       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1840       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1841       *norm = 0.0;
1842       v     = amata;
1843       jj    = amat->j;
1844       for (j = 0; j < amat->nz; j++) {
1845         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1846         v++;
1847       }
1848       v  = bmata;
1849       jj = bmat->j;
1850       for (j = 0; j < bmat->nz; j++) {
1851         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1852         v++;
1853       }
1854       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1855       for (j = 0; j < mat->cmap->N; j++) {
1856         if (tmp2[j] > *norm) *norm = tmp2[j];
1857       }
1858       PetscCall(PetscFree(tmp));
1859       PetscCall(PetscFree(tmp2));
1860       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1861     } else if (type == NORM_INFINITY) { /* max row norm */
1862       PetscReal ntemp = 0.0;
1863       for (j = 0; j < aij->A->rmap->n; j++) {
1864         v   = amata + amat->i[j];
1865         sum = 0.0;
1866         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1867           sum += PetscAbsScalar(*v);
1868           v++;
1869         }
1870         v = bmata + bmat->i[j];
1871         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1872           sum += PetscAbsScalar(*v);
1873           v++;
1874         }
1875         if (sum > ntemp) ntemp = sum;
1876       }
1877       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1878       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1879     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1880     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1881     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1882   }
1883   PetscFunctionReturn(PETSC_SUCCESS);
1884 }
1885 
1886 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1887 {
1888   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1889   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1890   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1891   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1892   Mat              B, A_diag, *B_diag;
1893   const MatScalar *pbv, *bv;
1894 
1895   PetscFunctionBegin;
1896   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1897   ma = A->rmap->n;
1898   na = A->cmap->n;
1899   mb = a->B->rmap->n;
1900   nb = a->B->cmap->n;
1901   ai = Aloc->i;
1902   aj = Aloc->j;
1903   bi = Bloc->i;
1904   bj = Bloc->j;
1905   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1906     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1907     PetscSFNode         *oloc;
1908     PETSC_UNUSED PetscSF sf;
1909 
1910     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1911     /* compute d_nnz for preallocation */
1912     PetscCall(PetscArrayzero(d_nnz, na));
1913     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1914     /* compute local off-diagonal contributions */
1915     PetscCall(PetscArrayzero(g_nnz, nb));
1916     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1917     /* map those to global */
1918     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1919     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1920     PetscCall(PetscSFSetFromOptions(sf));
1921     PetscCall(PetscArrayzero(o_nnz, na));
1922     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1923     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1924     PetscCall(PetscSFDestroy(&sf));
1925 
1926     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1927     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1928     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1929     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1930     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1931     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1932   } else {
1933     B = *matout;
1934     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1935   }
1936 
1937   b           = (Mat_MPIAIJ *)B->data;
1938   A_diag      = a->A;
1939   B_diag      = &b->A;
1940   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1941   A_diag_ncol = A_diag->cmap->N;
1942   B_diag_ilen = sub_B_diag->ilen;
1943   B_diag_i    = sub_B_diag->i;
1944 
1945   /* Set ilen for diagonal of B */
1946   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1947 
1948   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1949   very quickly (=without using MatSetValues), because all writes are local. */
1950   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1951   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1952 
1953   /* copy over the B part */
1954   PetscCall(PetscMalloc1(bi[mb], &cols));
1955   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1956   pbv = bv;
1957   row = A->rmap->rstart;
1958   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1959   cols_tmp = cols;
1960   for (i = 0; i < mb; i++) {
1961     ncol = bi[i + 1] - bi[i];
1962     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1963     row++;
1964     if (pbv) pbv += ncol;
1965     if (cols_tmp) cols_tmp += ncol;
1966   }
1967   PetscCall(PetscFree(cols));
1968   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1969 
1970   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1971   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1972   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1973     *matout = B;
1974   } else {
1975     PetscCall(MatHeaderMerge(A, &B));
1976   }
1977   PetscFunctionReturn(PETSC_SUCCESS);
1978 }
1979 
1980 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1981 {
1982   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1983   Mat         a = aij->A, b = aij->B;
1984   PetscInt    s1, s2, s3;
1985 
1986   PetscFunctionBegin;
1987   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1988   if (rr) {
1989     PetscCall(VecGetLocalSize(rr, &s1));
1990     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1991     /* Overlap communication with computation. */
1992     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1993   }
1994   if (ll) {
1995     PetscCall(VecGetLocalSize(ll, &s1));
1996     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1997     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1998   }
1999   /* scale  the diagonal block */
2000   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2001 
2002   if (rr) {
2003     /* Do a scatter end and then right scale the off-diagonal block */
2004     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2005     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2006   }
2007   PetscFunctionReturn(PETSC_SUCCESS);
2008 }
2009 
2010 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2011 {
2012   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2013 
2014   PetscFunctionBegin;
2015   PetscCall(MatSetUnfactored(a->A));
2016   PetscFunctionReturn(PETSC_SUCCESS);
2017 }
2018 
2019 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2020 {
2021   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2022   Mat         a, b, c, d;
2023   PetscBool   flg;
2024 
2025   PetscFunctionBegin;
2026   a = matA->A;
2027   b = matA->B;
2028   c = matB->A;
2029   d = matB->B;
2030 
2031   PetscCall(MatEqual(a, c, &flg));
2032   if (flg) PetscCall(MatEqual(b, d, &flg));
2033   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2034   PetscFunctionReturn(PETSC_SUCCESS);
2035 }
2036 
2037 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2038 {
2039   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2040   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2041 
2042   PetscFunctionBegin;
2043   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2044   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2045     /* because of the column compression in the off-processor part of the matrix a->B,
2046        the number of columns in a->B and b->B may be different, hence we cannot call
2047        the MatCopy() directly on the two parts. If need be, we can provide a more
2048        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2049        then copying the submatrices */
2050     PetscCall(MatCopy_Basic(A, B, str));
2051   } else {
2052     PetscCall(MatCopy(a->A, b->A, str));
2053     PetscCall(MatCopy(a->B, b->B, str));
2054   }
2055   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2056   PetscFunctionReturn(PETSC_SUCCESS);
2057 }
2058 
2059 /*
2060    Computes the number of nonzeros per row needed for preallocation when X and Y
2061    have different nonzero structure.
2062 */
2063 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2064 {
2065   PetscInt i, j, k, nzx, nzy;
2066 
2067   PetscFunctionBegin;
2068   /* Set the number of nonzeros in the new matrix */
2069   for (i = 0; i < m; i++) {
2070     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2071     nzx    = xi[i + 1] - xi[i];
2072     nzy    = yi[i + 1] - yi[i];
2073     nnz[i] = 0;
2074     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2075       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2076       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2077       nnz[i]++;
2078     }
2079     for (; k < nzy; k++) nnz[i]++;
2080   }
2081   PetscFunctionReturn(PETSC_SUCCESS);
2082 }
2083 
2084 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2085 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2086 {
2087   PetscInt    m = Y->rmap->N;
2088   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2089   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2090 
2091   PetscFunctionBegin;
2092   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2093   PetscFunctionReturn(PETSC_SUCCESS);
2094 }
2095 
2096 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2097 {
2098   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2099 
2100   PetscFunctionBegin;
2101   if (str == SAME_NONZERO_PATTERN) {
2102     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2103     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2104   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2105     PetscCall(MatAXPY_Basic(Y, a, X, str));
2106   } else {
2107     Mat       B;
2108     PetscInt *nnz_d, *nnz_o;
2109 
2110     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2111     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2112     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2113     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2114     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2115     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2116     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2117     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2118     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2119     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2120     PetscCall(MatHeaderMerge(Y, &B));
2121     PetscCall(PetscFree(nnz_d));
2122     PetscCall(PetscFree(nnz_o));
2123   }
2124   PetscFunctionReturn(PETSC_SUCCESS);
2125 }
2126 
2127 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2128 
2129 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2130 {
2131   PetscFunctionBegin;
2132   if (PetscDefined(USE_COMPLEX)) {
2133     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2134 
2135     PetscCall(MatConjugate_SeqAIJ(aij->A));
2136     PetscCall(MatConjugate_SeqAIJ(aij->B));
2137   }
2138   PetscFunctionReturn(PETSC_SUCCESS);
2139 }
2140 
2141 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2144 
2145   PetscFunctionBegin;
2146   PetscCall(MatRealPart(a->A));
2147   PetscCall(MatRealPart(a->B));
2148   PetscFunctionReturn(PETSC_SUCCESS);
2149 }
2150 
2151 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2152 {
2153   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2154 
2155   PetscFunctionBegin;
2156   PetscCall(MatImaginaryPart(a->A));
2157   PetscCall(MatImaginaryPart(a->B));
2158   PetscFunctionReturn(PETSC_SUCCESS);
2159 }
2160 
2161 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2162 {
2163   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2164   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2165   PetscScalar       *va, *vv;
2166   Vec                vB, vA;
2167   const PetscScalar *vb;
2168 
2169   PetscFunctionBegin;
2170   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2171   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2172 
2173   PetscCall(VecGetArrayWrite(vA, &va));
2174   if (idx) {
2175     for (i = 0; i < m; i++) {
2176       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2177     }
2178   }
2179 
2180   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2181   PetscCall(PetscMalloc1(m, &idxb));
2182   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2183 
2184   PetscCall(VecGetArrayWrite(v, &vv));
2185   PetscCall(VecGetArrayRead(vB, &vb));
2186   for (i = 0; i < m; i++) {
2187     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2188       vv[i] = vb[i];
2189       if (idx) idx[i] = a->garray[idxb[i]];
2190     } else {
2191       vv[i] = va[i];
2192       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2193     }
2194   }
2195   PetscCall(VecRestoreArrayWrite(vA, &vv));
2196   PetscCall(VecRestoreArrayWrite(vA, &va));
2197   PetscCall(VecRestoreArrayRead(vB, &vb));
2198   PetscCall(PetscFree(idxb));
2199   PetscCall(VecDestroy(&vA));
2200   PetscCall(VecDestroy(&vB));
2201   PetscFunctionReturn(PETSC_SUCCESS);
2202 }
2203 
2204 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2205 {
2206   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2207   PetscInt           m = A->rmap->n, n = A->cmap->n;
2208   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2209   PetscInt          *cmap = mat->garray;
2210   PetscInt          *diagIdx, *offdiagIdx;
2211   Vec                diagV, offdiagV;
2212   PetscScalar       *a, *diagA, *offdiagA;
2213   const PetscScalar *ba, *bav;
2214   PetscInt           r, j, col, ncols, *bi, *bj;
2215   Mat                B = mat->B;
2216   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2217 
2218   PetscFunctionBegin;
2219   /* When a process holds entire A and other processes have no entry */
2220   if (A->cmap->N == n) {
2221     PetscCall(VecGetArrayWrite(v, &diagA));
2222     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2223     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2224     PetscCall(VecDestroy(&diagV));
2225     PetscCall(VecRestoreArrayWrite(v, &diagA));
2226     PetscFunctionReturn(PETSC_SUCCESS);
2227   } else if (n == 0) {
2228     if (m) {
2229       PetscCall(VecGetArrayWrite(v, &a));
2230       for (r = 0; r < m; r++) {
2231         a[r] = 0.0;
2232         if (idx) idx[r] = -1;
2233       }
2234       PetscCall(VecRestoreArrayWrite(v, &a));
2235     }
2236     PetscFunctionReturn(PETSC_SUCCESS);
2237   }
2238 
2239   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2240   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2241   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2242   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2243 
2244   /* Get offdiagIdx[] for implicit 0.0 */
2245   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2246   ba = bav;
2247   bi = b->i;
2248   bj = b->j;
2249   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2250   for (r = 0; r < m; r++) {
2251     ncols = bi[r + 1] - bi[r];
2252     if (ncols == A->cmap->N - n) { /* Brow is dense */
2253       offdiagA[r]   = *ba;
2254       offdiagIdx[r] = cmap[0];
2255     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2256       offdiagA[r] = 0.0;
2257 
2258       /* Find first hole in the cmap */
2259       for (j = 0; j < ncols; j++) {
2260         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2261         if (col > j && j < cstart) {
2262           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2263           break;
2264         } else if (col > j + n && j >= cstart) {
2265           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2266           break;
2267         }
2268       }
2269       if (j == ncols && ncols < A->cmap->N - n) {
2270         /* a hole is outside compressed Bcols */
2271         if (ncols == 0) {
2272           if (cstart) {
2273             offdiagIdx[r] = 0;
2274           } else offdiagIdx[r] = cend;
2275         } else { /* ncols > 0 */
2276           offdiagIdx[r] = cmap[ncols - 1] + 1;
2277           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2278         }
2279       }
2280     }
2281 
2282     for (j = 0; j < ncols; j++) {
2283       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2284         offdiagA[r]   = *ba;
2285         offdiagIdx[r] = cmap[*bj];
2286       }
2287       ba++;
2288       bj++;
2289     }
2290   }
2291 
2292   PetscCall(VecGetArrayWrite(v, &a));
2293   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2294   for (r = 0; r < m; ++r) {
2295     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2296       a[r] = diagA[r];
2297       if (idx) idx[r] = cstart + diagIdx[r];
2298     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2299       a[r] = diagA[r];
2300       if (idx) {
2301         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2302           idx[r] = cstart + diagIdx[r];
2303         } else idx[r] = offdiagIdx[r];
2304       }
2305     } else {
2306       a[r] = offdiagA[r];
2307       if (idx) idx[r] = offdiagIdx[r];
2308     }
2309   }
2310   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2311   PetscCall(VecRestoreArrayWrite(v, &a));
2312   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2313   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2314   PetscCall(VecDestroy(&diagV));
2315   PetscCall(VecDestroy(&offdiagV));
2316   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2317   PetscFunctionReturn(PETSC_SUCCESS);
2318 }
2319 
2320 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2321 {
2322   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2323   PetscInt           m = A->rmap->n, n = A->cmap->n;
2324   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2325   PetscInt          *cmap = mat->garray;
2326   PetscInt          *diagIdx, *offdiagIdx;
2327   Vec                diagV, offdiagV;
2328   PetscScalar       *a, *diagA, *offdiagA;
2329   const PetscScalar *ba, *bav;
2330   PetscInt           r, j, col, ncols, *bi, *bj;
2331   Mat                B = mat->B;
2332   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2333 
2334   PetscFunctionBegin;
2335   /* When a process holds entire A and other processes have no entry */
2336   if (A->cmap->N == n) {
2337     PetscCall(VecGetArrayWrite(v, &diagA));
2338     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2339     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2340     PetscCall(VecDestroy(&diagV));
2341     PetscCall(VecRestoreArrayWrite(v, &diagA));
2342     PetscFunctionReturn(PETSC_SUCCESS);
2343   } else if (n == 0) {
2344     if (m) {
2345       PetscCall(VecGetArrayWrite(v, &a));
2346       for (r = 0; r < m; r++) {
2347         a[r] = PETSC_MAX_REAL;
2348         if (idx) idx[r] = -1;
2349       }
2350       PetscCall(VecRestoreArrayWrite(v, &a));
2351     }
2352     PetscFunctionReturn(PETSC_SUCCESS);
2353   }
2354 
2355   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2356   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2357   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2358   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2359 
2360   /* Get offdiagIdx[] for implicit 0.0 */
2361   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2362   ba = bav;
2363   bi = b->i;
2364   bj = b->j;
2365   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2366   for (r = 0; r < m; r++) {
2367     ncols = bi[r + 1] - bi[r];
2368     if (ncols == A->cmap->N - n) { /* Brow is dense */
2369       offdiagA[r]   = *ba;
2370       offdiagIdx[r] = cmap[0];
2371     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2372       offdiagA[r] = 0.0;
2373 
2374       /* Find first hole in the cmap */
2375       for (j = 0; j < ncols; j++) {
2376         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2377         if (col > j && j < cstart) {
2378           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2379           break;
2380         } else if (col > j + n && j >= cstart) {
2381           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2382           break;
2383         }
2384       }
2385       if (j == ncols && ncols < A->cmap->N - n) {
2386         /* a hole is outside compressed Bcols */
2387         if (ncols == 0) {
2388           if (cstart) {
2389             offdiagIdx[r] = 0;
2390           } else offdiagIdx[r] = cend;
2391         } else { /* ncols > 0 */
2392           offdiagIdx[r] = cmap[ncols - 1] + 1;
2393           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2394         }
2395       }
2396     }
2397 
2398     for (j = 0; j < ncols; j++) {
2399       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2400         offdiagA[r]   = *ba;
2401         offdiagIdx[r] = cmap[*bj];
2402       }
2403       ba++;
2404       bj++;
2405     }
2406   }
2407 
2408   PetscCall(VecGetArrayWrite(v, &a));
2409   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2410   for (r = 0; r < m; ++r) {
2411     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2412       a[r] = diagA[r];
2413       if (idx) idx[r] = cstart + diagIdx[r];
2414     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2415       a[r] = diagA[r];
2416       if (idx) {
2417         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2418           idx[r] = cstart + diagIdx[r];
2419         } else idx[r] = offdiagIdx[r];
2420       }
2421     } else {
2422       a[r] = offdiagA[r];
2423       if (idx) idx[r] = offdiagIdx[r];
2424     }
2425   }
2426   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2427   PetscCall(VecRestoreArrayWrite(v, &a));
2428   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2429   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2430   PetscCall(VecDestroy(&diagV));
2431   PetscCall(VecDestroy(&offdiagV));
2432   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2433   PetscFunctionReturn(PETSC_SUCCESS);
2434 }
2435 
2436 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2437 {
2438   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2439   PetscInt           m = A->rmap->n, n = A->cmap->n;
2440   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2441   PetscInt          *cmap = mat->garray;
2442   PetscInt          *diagIdx, *offdiagIdx;
2443   Vec                diagV, offdiagV;
2444   PetscScalar       *a, *diagA, *offdiagA;
2445   const PetscScalar *ba, *bav;
2446   PetscInt           r, j, col, ncols, *bi, *bj;
2447   Mat                B = mat->B;
2448   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2449 
2450   PetscFunctionBegin;
2451   /* When a process holds entire A and other processes have no entry */
2452   if (A->cmap->N == n) {
2453     PetscCall(VecGetArrayWrite(v, &diagA));
2454     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2455     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2456     PetscCall(VecDestroy(&diagV));
2457     PetscCall(VecRestoreArrayWrite(v, &diagA));
2458     PetscFunctionReturn(PETSC_SUCCESS);
2459   } else if (n == 0) {
2460     if (m) {
2461       PetscCall(VecGetArrayWrite(v, &a));
2462       for (r = 0; r < m; r++) {
2463         a[r] = PETSC_MIN_REAL;
2464         if (idx) idx[r] = -1;
2465       }
2466       PetscCall(VecRestoreArrayWrite(v, &a));
2467     }
2468     PetscFunctionReturn(PETSC_SUCCESS);
2469   }
2470 
2471   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2472   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2473   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2474   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2475 
2476   /* Get offdiagIdx[] for implicit 0.0 */
2477   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2478   ba = bav;
2479   bi = b->i;
2480   bj = b->j;
2481   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2482   for (r = 0; r < m; r++) {
2483     ncols = bi[r + 1] - bi[r];
2484     if (ncols == A->cmap->N - n) { /* Brow is dense */
2485       offdiagA[r]   = *ba;
2486       offdiagIdx[r] = cmap[0];
2487     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2488       offdiagA[r] = 0.0;
2489 
2490       /* Find first hole in the cmap */
2491       for (j = 0; j < ncols; j++) {
2492         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2493         if (col > j && j < cstart) {
2494           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2495           break;
2496         } else if (col > j + n && j >= cstart) {
2497           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2498           break;
2499         }
2500       }
2501       if (j == ncols && ncols < A->cmap->N - n) {
2502         /* a hole is outside compressed Bcols */
2503         if (ncols == 0) {
2504           if (cstart) {
2505             offdiagIdx[r] = 0;
2506           } else offdiagIdx[r] = cend;
2507         } else { /* ncols > 0 */
2508           offdiagIdx[r] = cmap[ncols - 1] + 1;
2509           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2510         }
2511       }
2512     }
2513 
2514     for (j = 0; j < ncols; j++) {
2515       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2516         offdiagA[r]   = *ba;
2517         offdiagIdx[r] = cmap[*bj];
2518       }
2519       ba++;
2520       bj++;
2521     }
2522   }
2523 
2524   PetscCall(VecGetArrayWrite(v, &a));
2525   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2526   for (r = 0; r < m; ++r) {
2527     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2528       a[r] = diagA[r];
2529       if (idx) idx[r] = cstart + diagIdx[r];
2530     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2531       a[r] = diagA[r];
2532       if (idx) {
2533         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2534           idx[r] = cstart + diagIdx[r];
2535         } else idx[r] = offdiagIdx[r];
2536       }
2537     } else {
2538       a[r] = offdiagA[r];
2539       if (idx) idx[r] = offdiagIdx[r];
2540     }
2541   }
2542   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2543   PetscCall(VecRestoreArrayWrite(v, &a));
2544   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2545   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2546   PetscCall(VecDestroy(&diagV));
2547   PetscCall(VecDestroy(&offdiagV));
2548   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2549   PetscFunctionReturn(PETSC_SUCCESS);
2550 }
2551 
2552 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2553 {
2554   Mat *dummy;
2555 
2556   PetscFunctionBegin;
2557   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2558   *newmat = *dummy;
2559   PetscCall(PetscFree(dummy));
2560   PetscFunctionReturn(PETSC_SUCCESS);
2561 }
2562 
2563 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2564 {
2565   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2566 
2567   PetscFunctionBegin;
2568   PetscCall(MatInvertBlockDiagonal(a->A, values));
2569   A->factorerrortype = a->A->factorerrortype;
2570   PetscFunctionReturn(PETSC_SUCCESS);
2571 }
2572 
2573 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2574 {
2575   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2576 
2577   PetscFunctionBegin;
2578   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2579   PetscCall(MatSetRandom(aij->A, rctx));
2580   if (x->assembled) {
2581     PetscCall(MatSetRandom(aij->B, rctx));
2582   } else {
2583     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2584   }
2585   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2586   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2587   PetscFunctionReturn(PETSC_SUCCESS);
2588 }
2589 
2590 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2591 {
2592   PetscFunctionBegin;
2593   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2594   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2595   PetscFunctionReturn(PETSC_SUCCESS);
2596 }
2597 
2598 /*@
2599   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2600 
2601   Not Collective
2602 
2603   Input Parameter:
2604 . A - the matrix
2605 
2606   Output Parameter:
2607 . nz - the number of nonzeros
2608 
2609   Level: advanced
2610 
2611 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2612 @*/
2613 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2614 {
2615   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2616   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2617   PetscBool   isaij;
2618 
2619   PetscFunctionBegin;
2620   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2621   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2622   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2623   PetscFunctionReturn(PETSC_SUCCESS);
2624 }
2625 
2626 /*@
2627   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2628 
2629   Collective
2630 
2631   Input Parameters:
2632 + A  - the matrix
2633 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2634 
2635   Level: advanced
2636 
2637 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2638 @*/
2639 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2640 {
2641   PetscFunctionBegin;
2642   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2643   PetscFunctionReturn(PETSC_SUCCESS);
2644 }
2645 
2646 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2647 {
2648   PetscBool sc = PETSC_FALSE, flg;
2649 
2650   PetscFunctionBegin;
2651   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2652   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2653   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2654   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2655   PetscOptionsHeadEnd();
2656   PetscFunctionReturn(PETSC_SUCCESS);
2657 }
2658 
2659 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2660 {
2661   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2662   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2663 
2664   PetscFunctionBegin;
2665   if (!Y->preallocated) {
2666     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2667   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2668     PetscInt nonew = aij->nonew;
2669     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2670     aij->nonew = nonew;
2671   }
2672   PetscCall(MatShift_Basic(Y, a));
2673   PetscFunctionReturn(PETSC_SUCCESS);
2674 }
2675 
2676 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2677 {
2678   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2679 
2680   PetscFunctionBegin;
2681   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2682   PetscCall(MatMissingDiagonal(a->A, missing, d));
2683   if (d) {
2684     PetscInt rstart;
2685     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2686     *d += rstart;
2687   }
2688   PetscFunctionReturn(PETSC_SUCCESS);
2689 }
2690 
2691 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2692 {
2693   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2694 
2695   PetscFunctionBegin;
2696   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2697   PetscFunctionReturn(PETSC_SUCCESS);
2698 }
2699 
2700 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2701 {
2702   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2703 
2704   PetscFunctionBegin;
2705   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2706   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2707   PetscFunctionReturn(PETSC_SUCCESS);
2708 }
2709 
2710 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2711                                        MatGetRow_MPIAIJ,
2712                                        MatRestoreRow_MPIAIJ,
2713                                        MatMult_MPIAIJ,
2714                                        /* 4*/ MatMultAdd_MPIAIJ,
2715                                        MatMultTranspose_MPIAIJ,
2716                                        MatMultTransposeAdd_MPIAIJ,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        /*10*/ NULL,
2721                                        NULL,
2722                                        NULL,
2723                                        MatSOR_MPIAIJ,
2724                                        MatTranspose_MPIAIJ,
2725                                        /*15*/ MatGetInfo_MPIAIJ,
2726                                        MatEqual_MPIAIJ,
2727                                        MatGetDiagonal_MPIAIJ,
2728                                        MatDiagonalScale_MPIAIJ,
2729                                        MatNorm_MPIAIJ,
2730                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2731                                        MatAssemblyEnd_MPIAIJ,
2732                                        MatSetOption_MPIAIJ,
2733                                        MatZeroEntries_MPIAIJ,
2734                                        /*24*/ MatZeroRows_MPIAIJ,
2735                                        NULL,
2736                                        NULL,
2737                                        NULL,
2738                                        NULL,
2739                                        /*29*/ MatSetUp_MPI_Hash,
2740                                        NULL,
2741                                        NULL,
2742                                        MatGetDiagonalBlock_MPIAIJ,
2743                                        NULL,
2744                                        /*34*/ MatDuplicate_MPIAIJ,
2745                                        NULL,
2746                                        NULL,
2747                                        NULL,
2748                                        NULL,
2749                                        /*39*/ MatAXPY_MPIAIJ,
2750                                        MatCreateSubMatrices_MPIAIJ,
2751                                        MatIncreaseOverlap_MPIAIJ,
2752                                        MatGetValues_MPIAIJ,
2753                                        MatCopy_MPIAIJ,
2754                                        /*44*/ MatGetRowMax_MPIAIJ,
2755                                        MatScale_MPIAIJ,
2756                                        MatShift_MPIAIJ,
2757                                        MatDiagonalSet_MPIAIJ,
2758                                        MatZeroRowsColumns_MPIAIJ,
2759                                        /*49*/ MatSetRandom_MPIAIJ,
2760                                        MatGetRowIJ_MPIAIJ,
2761                                        MatRestoreRowIJ_MPIAIJ,
2762                                        NULL,
2763                                        NULL,
2764                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2765                                        NULL,
2766                                        MatSetUnfactored_MPIAIJ,
2767                                        MatPermute_MPIAIJ,
2768                                        NULL,
2769                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2770                                        MatDestroy_MPIAIJ,
2771                                        MatView_MPIAIJ,
2772                                        NULL,
2773                                        NULL,
2774                                        /*64*/ NULL,
2775                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        NULL,
2779                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2780                                        MatGetRowMinAbs_MPIAIJ,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                        NULL,
2785                                        /*75*/ MatFDColoringApply_AIJ,
2786                                        MatSetFromOptions_MPIAIJ,
2787                                        NULL,
2788                                        NULL,
2789                                        MatFindZeroDiagonals_MPIAIJ,
2790                                        /*80*/ NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*83*/ MatLoad_MPIAIJ,
2794                                        MatIsSymmetric_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*89*/ NULL,
2800                                        NULL,
2801                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2805                                        NULL,
2806                                        NULL,
2807                                        NULL,
2808                                        MatBindToCPU_MPIAIJ,
2809                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        MatConjugate_MPIAIJ,
2813                                        NULL,
2814                                        /*104*/ MatSetValuesRow_MPIAIJ,
2815                                        MatRealPart_MPIAIJ,
2816                                        MatImaginaryPart_MPIAIJ,
2817                                        NULL,
2818                                        NULL,
2819                                        /*109*/ NULL,
2820                                        NULL,
2821                                        MatGetRowMin_MPIAIJ,
2822                                        NULL,
2823                                        MatMissingDiagonal_MPIAIJ,
2824                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2825                                        NULL,
2826                                        MatGetGhosts_MPIAIJ,
2827                                        NULL,
2828                                        NULL,
2829                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2830                                        NULL,
2831                                        NULL,
2832                                        NULL,
2833                                        MatGetMultiProcBlock_MPIAIJ,
2834                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2835                                        MatGetColumnReductions_MPIAIJ,
2836                                        MatInvertBlockDiagonal_MPIAIJ,
2837                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2838                                        MatCreateSubMatricesMPI_MPIAIJ,
2839                                        /*129*/ NULL,
2840                                        NULL,
2841                                        NULL,
2842                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2843                                        NULL,
2844                                        /*134*/ NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        NULL,
2848                                        NULL,
2849                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2850                                        NULL,
2851                                        NULL,
2852                                        MatFDColoringSetUp_MPIXAIJ,
2853                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2854                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2855                                        /*145*/ NULL,
2856                                        NULL,
2857                                        NULL,
2858                                        MatCreateGraph_Simple_AIJ,
2859                                        NULL,
2860                                        /*150*/ NULL,
2861                                        MatEliminateZeros_MPIAIJ};
2862 
2863 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2864 {
2865   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2866 
2867   PetscFunctionBegin;
2868   PetscCall(MatStoreValues(aij->A));
2869   PetscCall(MatStoreValues(aij->B));
2870   PetscFunctionReturn(PETSC_SUCCESS);
2871 }
2872 
2873 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2874 {
2875   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2876 
2877   PetscFunctionBegin;
2878   PetscCall(MatRetrieveValues(aij->A));
2879   PetscCall(MatRetrieveValues(aij->B));
2880   PetscFunctionReturn(PETSC_SUCCESS);
2881 }
2882 
2883 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2884 {
2885   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2886   PetscMPIInt size;
2887 
2888   PetscFunctionBegin;
2889   if (B->hash_active) {
2890     B->ops[0]      = b->cops;
2891     B->hash_active = PETSC_FALSE;
2892   }
2893   PetscCall(PetscLayoutSetUp(B->rmap));
2894   PetscCall(PetscLayoutSetUp(B->cmap));
2895 
2896 #if defined(PETSC_USE_CTABLE)
2897   PetscCall(PetscHMapIDestroy(&b->colmap));
2898 #else
2899   PetscCall(PetscFree(b->colmap));
2900 #endif
2901   PetscCall(PetscFree(b->garray));
2902   PetscCall(VecDestroy(&b->lvec));
2903   PetscCall(VecScatterDestroy(&b->Mvctx));
2904 
2905   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2906   PetscCall(MatDestroy(&b->B));
2907   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2908   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2909   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2910   PetscCall(MatSetType(b->B, MATSEQAIJ));
2911 
2912   PetscCall(MatDestroy(&b->A));
2913   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2914   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2915   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2916   PetscCall(MatSetType(b->A, MATSEQAIJ));
2917 
2918   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2919   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2920   B->preallocated  = PETSC_TRUE;
2921   B->was_assembled = PETSC_FALSE;
2922   B->assembled     = PETSC_FALSE;
2923   PetscFunctionReturn(PETSC_SUCCESS);
2924 }
2925 
2926 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2927 {
2928   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2929 
2930   PetscFunctionBegin;
2931   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2932   PetscCall(PetscLayoutSetUp(B->rmap));
2933   PetscCall(PetscLayoutSetUp(B->cmap));
2934 
2935 #if defined(PETSC_USE_CTABLE)
2936   PetscCall(PetscHMapIDestroy(&b->colmap));
2937 #else
2938   PetscCall(PetscFree(b->colmap));
2939 #endif
2940   PetscCall(PetscFree(b->garray));
2941   PetscCall(VecDestroy(&b->lvec));
2942   PetscCall(VecScatterDestroy(&b->Mvctx));
2943 
2944   PetscCall(MatResetPreallocation(b->A));
2945   PetscCall(MatResetPreallocation(b->B));
2946   B->preallocated  = PETSC_TRUE;
2947   B->was_assembled = PETSC_FALSE;
2948   B->assembled     = PETSC_FALSE;
2949   PetscFunctionReturn(PETSC_SUCCESS);
2950 }
2951 
2952 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2953 {
2954   Mat         mat;
2955   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2956 
2957   PetscFunctionBegin;
2958   *newmat = NULL;
2959   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2960   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2961   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2962   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2963   a = (Mat_MPIAIJ *)mat->data;
2964 
2965   mat->factortype   = matin->factortype;
2966   mat->assembled    = matin->assembled;
2967   mat->insertmode   = NOT_SET_VALUES;
2968   mat->preallocated = matin->preallocated;
2969 
2970   a->size         = oldmat->size;
2971   a->rank         = oldmat->rank;
2972   a->donotstash   = oldmat->donotstash;
2973   a->roworiented  = oldmat->roworiented;
2974   a->rowindices   = NULL;
2975   a->rowvalues    = NULL;
2976   a->getrowactive = PETSC_FALSE;
2977 
2978   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2979   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2980 
2981   if (oldmat->colmap) {
2982 #if defined(PETSC_USE_CTABLE)
2983     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2984 #else
2985     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2986     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2987 #endif
2988   } else a->colmap = NULL;
2989   if (oldmat->garray) {
2990     PetscInt len;
2991     len = oldmat->B->cmap->n;
2992     PetscCall(PetscMalloc1(len + 1, &a->garray));
2993     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2994   } else a->garray = NULL;
2995 
2996   /* It may happen MatDuplicate is called with a non-assembled matrix
2997      In fact, MatDuplicate only requires the matrix to be preallocated
2998      This may happen inside a DMCreateMatrix_Shell */
2999   if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3000   if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3001   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3002   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3003   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3004   *newmat = mat;
3005   PetscFunctionReturn(PETSC_SUCCESS);
3006 }
3007 
3008 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3009 {
3010   PetscBool isbinary, ishdf5;
3011 
3012   PetscFunctionBegin;
3013   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3014   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3015   /* force binary viewer to load .info file if it has not yet done so */
3016   PetscCall(PetscViewerSetUp(viewer));
3017   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3018   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3019   if (isbinary) {
3020     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3021   } else if (ishdf5) {
3022 #if defined(PETSC_HAVE_HDF5)
3023     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3024 #else
3025     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3026 #endif
3027   } else {
3028     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3029   }
3030   PetscFunctionReturn(PETSC_SUCCESS);
3031 }
3032 
3033 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3034 {
3035   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3036   PetscInt    *rowidxs, *colidxs;
3037   PetscScalar *matvals;
3038 
3039   PetscFunctionBegin;
3040   PetscCall(PetscViewerSetUp(viewer));
3041 
3042   /* read in matrix header */
3043   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3044   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3045   M  = header[1];
3046   N  = header[2];
3047   nz = header[3];
3048   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3049   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3050   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3051 
3052   /* set block sizes from the viewer's .info file */
3053   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3054   /* set global sizes if not set already */
3055   if (mat->rmap->N < 0) mat->rmap->N = M;
3056   if (mat->cmap->N < 0) mat->cmap->N = N;
3057   PetscCall(PetscLayoutSetUp(mat->rmap));
3058   PetscCall(PetscLayoutSetUp(mat->cmap));
3059 
3060   /* check if the matrix sizes are correct */
3061   PetscCall(MatGetSize(mat, &rows, &cols));
3062   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3063 
3064   /* read in row lengths and build row indices */
3065   PetscCall(MatGetLocalSize(mat, &m, NULL));
3066   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3067   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3068   rowidxs[0] = 0;
3069   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3070   if (nz != PETSC_MAX_INT) {
3071     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3072     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3073   }
3074 
3075   /* read in column indices and matrix values */
3076   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3077   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3078   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3079   /* store matrix indices and values */
3080   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3081   PetscCall(PetscFree(rowidxs));
3082   PetscCall(PetscFree2(colidxs, matvals));
3083   PetscFunctionReturn(PETSC_SUCCESS);
3084 }
3085 
3086 /* Not scalable because of ISAllGather() unless getting all columns. */
3087 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3088 {
3089   IS          iscol_local;
3090   PetscBool   isstride;
3091   PetscMPIInt lisstride = 0, gisstride;
3092 
3093   PetscFunctionBegin;
3094   /* check if we are grabbing all columns*/
3095   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3096 
3097   if (isstride) {
3098     PetscInt start, len, mstart, mlen;
3099     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3100     PetscCall(ISGetLocalSize(iscol, &len));
3101     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3102     if (mstart == start && mlen - mstart == len) lisstride = 1;
3103   }
3104 
3105   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3106   if (gisstride) {
3107     PetscInt N;
3108     PetscCall(MatGetSize(mat, NULL, &N));
3109     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3110     PetscCall(ISSetIdentity(iscol_local));
3111     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3112   } else {
3113     PetscInt cbs;
3114     PetscCall(ISGetBlockSize(iscol, &cbs));
3115     PetscCall(ISAllGather(iscol, &iscol_local));
3116     PetscCall(ISSetBlockSize(iscol_local, cbs));
3117   }
3118 
3119   *isseq = iscol_local;
3120   PetscFunctionReturn(PETSC_SUCCESS);
3121 }
3122 
3123 /*
3124  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3125  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3126 
3127  Input Parameters:
3128 +   mat - matrix
3129 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3130            i.e., mat->rstart <= isrow[i] < mat->rend
3131 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3132            i.e., mat->cstart <= iscol[i] < mat->cend
3133 
3134  Output Parameters:
3135 +   isrow_d - sequential row index set for retrieving mat->A
3136 .   iscol_d - sequential  column index set for retrieving mat->A
3137 .   iscol_o - sequential column index set for retrieving mat->B
3138 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3139  */
3140 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3141 {
3142   Vec             x, cmap;
3143   const PetscInt *is_idx;
3144   PetscScalar    *xarray, *cmaparray;
3145   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3146   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3147   Mat             B    = a->B;
3148   Vec             lvec = a->lvec, lcmap;
3149   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3150   MPI_Comm        comm;
3151   VecScatter      Mvctx = a->Mvctx;
3152 
3153   PetscFunctionBegin;
3154   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3155   PetscCall(ISGetLocalSize(iscol, &ncols));
3156 
3157   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3158   PetscCall(MatCreateVecs(mat, &x, NULL));
3159   PetscCall(VecSet(x, -1.0));
3160   PetscCall(VecDuplicate(x, &cmap));
3161   PetscCall(VecSet(cmap, -1.0));
3162 
3163   /* Get start indices */
3164   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3165   isstart -= ncols;
3166   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3167 
3168   PetscCall(ISGetIndices(iscol, &is_idx));
3169   PetscCall(VecGetArray(x, &xarray));
3170   PetscCall(VecGetArray(cmap, &cmaparray));
3171   PetscCall(PetscMalloc1(ncols, &idx));
3172   for (i = 0; i < ncols; i++) {
3173     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3174     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3175     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3176   }
3177   PetscCall(VecRestoreArray(x, &xarray));
3178   PetscCall(VecRestoreArray(cmap, &cmaparray));
3179   PetscCall(ISRestoreIndices(iscol, &is_idx));
3180 
3181   /* Get iscol_d */
3182   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3183   PetscCall(ISGetBlockSize(iscol, &i));
3184   PetscCall(ISSetBlockSize(*iscol_d, i));
3185 
3186   /* Get isrow_d */
3187   PetscCall(ISGetLocalSize(isrow, &m));
3188   rstart = mat->rmap->rstart;
3189   PetscCall(PetscMalloc1(m, &idx));
3190   PetscCall(ISGetIndices(isrow, &is_idx));
3191   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3192   PetscCall(ISRestoreIndices(isrow, &is_idx));
3193 
3194   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3195   PetscCall(ISGetBlockSize(isrow, &i));
3196   PetscCall(ISSetBlockSize(*isrow_d, i));
3197 
3198   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3199   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3200   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3201 
3202   PetscCall(VecDuplicate(lvec, &lcmap));
3203 
3204   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3205   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3206 
3207   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3208   /* off-process column indices */
3209   count = 0;
3210   PetscCall(PetscMalloc1(Bn, &idx));
3211   PetscCall(PetscMalloc1(Bn, &cmap1));
3212 
3213   PetscCall(VecGetArray(lvec, &xarray));
3214   PetscCall(VecGetArray(lcmap, &cmaparray));
3215   for (i = 0; i < Bn; i++) {
3216     if (PetscRealPart(xarray[i]) > -1.0) {
3217       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3218       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3219       count++;
3220     }
3221   }
3222   PetscCall(VecRestoreArray(lvec, &xarray));
3223   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3224 
3225   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3226   /* cannot ensure iscol_o has same blocksize as iscol! */
3227 
3228   PetscCall(PetscFree(idx));
3229   *garray = cmap1;
3230 
3231   PetscCall(VecDestroy(&x));
3232   PetscCall(VecDestroy(&cmap));
3233   PetscCall(VecDestroy(&lcmap));
3234   PetscFunctionReturn(PETSC_SUCCESS);
3235 }
3236 
3237 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3238 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3239 {
3240   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3241   Mat         M = NULL;
3242   MPI_Comm    comm;
3243   IS          iscol_d, isrow_d, iscol_o;
3244   Mat         Asub = NULL, Bsub = NULL;
3245   PetscInt    n;
3246 
3247   PetscFunctionBegin;
3248   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3249 
3250   if (call == MAT_REUSE_MATRIX) {
3251     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3252     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3253     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3254 
3255     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3256     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3257 
3258     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3259     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3260 
3261     /* Update diagonal and off-diagonal portions of submat */
3262     asub = (Mat_MPIAIJ *)(*submat)->data;
3263     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3264     PetscCall(ISGetLocalSize(iscol_o, &n));
3265     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3266     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3267     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3268 
3269   } else { /* call == MAT_INITIAL_MATRIX) */
3270     const PetscInt *garray;
3271     PetscInt        BsubN;
3272 
3273     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3274     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3275 
3276     /* Create local submatrices Asub and Bsub */
3277     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3278     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3279 
3280     /* Create submatrix M */
3281     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3282 
3283     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3284     asub = (Mat_MPIAIJ *)M->data;
3285 
3286     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3287     n = asub->B->cmap->N;
3288     if (BsubN > n) {
3289       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3290       const PetscInt *idx;
3291       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3292       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3293 
3294       PetscCall(PetscMalloc1(n, &idx_new));
3295       j = 0;
3296       PetscCall(ISGetIndices(iscol_o, &idx));
3297       for (i = 0; i < n; i++) {
3298         if (j >= BsubN) break;
3299         while (subgarray[i] > garray[j]) j++;
3300 
3301         if (subgarray[i] == garray[j]) {
3302           idx_new[i] = idx[j++];
3303         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3304       }
3305       PetscCall(ISRestoreIndices(iscol_o, &idx));
3306 
3307       PetscCall(ISDestroy(&iscol_o));
3308       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3309 
3310     } else if (BsubN < n) {
3311       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3312     }
3313 
3314     PetscCall(PetscFree(garray));
3315     *submat = M;
3316 
3317     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3318     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3319     PetscCall(ISDestroy(&isrow_d));
3320 
3321     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3322     PetscCall(ISDestroy(&iscol_d));
3323 
3324     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3325     PetscCall(ISDestroy(&iscol_o));
3326   }
3327   PetscFunctionReturn(PETSC_SUCCESS);
3328 }
3329 
3330 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3331 {
3332   IS        iscol_local = NULL, isrow_d;
3333   PetscInt  csize;
3334   PetscInt  n, i, j, start, end;
3335   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3336   MPI_Comm  comm;
3337 
3338   PetscFunctionBegin;
3339   /* If isrow has same processor distribution as mat,
3340      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3341   if (call == MAT_REUSE_MATRIX) {
3342     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3343     if (isrow_d) {
3344       sameRowDist  = PETSC_TRUE;
3345       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3346     } else {
3347       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3348       if (iscol_local) {
3349         sameRowDist  = PETSC_TRUE;
3350         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3351       }
3352     }
3353   } else {
3354     /* Check if isrow has same processor distribution as mat */
3355     sameDist[0] = PETSC_FALSE;
3356     PetscCall(ISGetLocalSize(isrow, &n));
3357     if (!n) {
3358       sameDist[0] = PETSC_TRUE;
3359     } else {
3360       PetscCall(ISGetMinMax(isrow, &i, &j));
3361       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3362       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3363     }
3364 
3365     /* Check if iscol has same processor distribution as mat */
3366     sameDist[1] = PETSC_FALSE;
3367     PetscCall(ISGetLocalSize(iscol, &n));
3368     if (!n) {
3369       sameDist[1] = PETSC_TRUE;
3370     } else {
3371       PetscCall(ISGetMinMax(iscol, &i, &j));
3372       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3373       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3374     }
3375 
3376     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3377     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3378     sameRowDist = tsameDist[0];
3379   }
3380 
3381   if (sameRowDist) {
3382     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3383       /* isrow and iscol have same processor distribution as mat */
3384       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3385       PetscFunctionReturn(PETSC_SUCCESS);
3386     } else { /* sameRowDist */
3387       /* isrow has same processor distribution as mat */
3388       if (call == MAT_INITIAL_MATRIX) {
3389         PetscBool sorted;
3390         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3391         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3392         PetscCall(ISGetSize(iscol, &i));
3393         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3394 
3395         PetscCall(ISSorted(iscol_local, &sorted));
3396         if (sorted) {
3397           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3398           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3399           PetscFunctionReturn(PETSC_SUCCESS);
3400         }
3401       } else { /* call == MAT_REUSE_MATRIX */
3402         IS iscol_sub;
3403         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3404         if (iscol_sub) {
3405           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3406           PetscFunctionReturn(PETSC_SUCCESS);
3407         }
3408       }
3409     }
3410   }
3411 
3412   /* General case: iscol -> iscol_local which has global size of iscol */
3413   if (call == MAT_REUSE_MATRIX) {
3414     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3415     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3416   } else {
3417     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3418   }
3419 
3420   PetscCall(ISGetLocalSize(iscol, &csize));
3421   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3422 
3423   if (call == MAT_INITIAL_MATRIX) {
3424     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3425     PetscCall(ISDestroy(&iscol_local));
3426   }
3427   PetscFunctionReturn(PETSC_SUCCESS);
3428 }
3429 
3430 /*@C
3431   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3432   and "off-diagonal" part of the matrix in CSR format.
3433 
3434   Collective
3435 
3436   Input Parameters:
3437 + comm   - MPI communicator
3438 . A      - "diagonal" portion of matrix
3439 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3440 - garray - global index of `B` columns
3441 
3442   Output Parameter:
3443 . mat - the matrix, with input `A` as its local diagonal matrix
3444 
3445   Level: advanced
3446 
3447   Notes:
3448   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3449 
3450   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3451 
3452 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3453 @*/
3454 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3455 {
3456   Mat_MPIAIJ        *maij;
3457   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3458   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3459   const PetscScalar *oa;
3460   Mat                Bnew;
3461   PetscInt           m, n, N;
3462   MatType            mpi_mat_type;
3463 
3464   PetscFunctionBegin;
3465   PetscCall(MatCreate(comm, mat));
3466   PetscCall(MatGetSize(A, &m, &n));
3467   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3468   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3469   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3470   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3471 
3472   /* Get global columns of mat */
3473   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3474 
3475   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3476   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3477   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3478   PetscCall(MatSetType(*mat, mpi_mat_type));
3479 
3480   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3481   maij = (Mat_MPIAIJ *)(*mat)->data;
3482 
3483   (*mat)->preallocated = PETSC_TRUE;
3484 
3485   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3486   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3487 
3488   /* Set A as diagonal portion of *mat */
3489   maij->A = A;
3490 
3491   nz = oi[m];
3492   for (i = 0; i < nz; i++) {
3493     col   = oj[i];
3494     oj[i] = garray[col];
3495   }
3496 
3497   /* Set Bnew as off-diagonal portion of *mat */
3498   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3499   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3500   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3501   bnew        = (Mat_SeqAIJ *)Bnew->data;
3502   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3503   maij->B     = Bnew;
3504 
3505   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3506 
3507   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3508   b->free_a       = PETSC_FALSE;
3509   b->free_ij      = PETSC_FALSE;
3510   PetscCall(MatDestroy(&B));
3511 
3512   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3513   bnew->free_a       = PETSC_TRUE;
3514   bnew->free_ij      = PETSC_TRUE;
3515 
3516   /* condense columns of maij->B */
3517   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3518   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3519   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3520   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3521   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3522   PetscFunctionReturn(PETSC_SUCCESS);
3523 }
3524 
3525 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3526 
3527 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3528 {
3529   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3530   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3531   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3532   Mat             M, Msub, B = a->B;
3533   MatScalar      *aa;
3534   Mat_SeqAIJ     *aij;
3535   PetscInt       *garray = a->garray, *colsub, Ncols;
3536   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3537   IS              iscol_sub, iscmap;
3538   const PetscInt *is_idx, *cmap;
3539   PetscBool       allcolumns = PETSC_FALSE;
3540   MPI_Comm        comm;
3541 
3542   PetscFunctionBegin;
3543   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3544   if (call == MAT_REUSE_MATRIX) {
3545     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3546     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3547     PetscCall(ISGetLocalSize(iscol_sub, &count));
3548 
3549     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3550     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3551 
3552     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3553     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3554 
3555     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3556 
3557   } else { /* call == MAT_INITIAL_MATRIX) */
3558     PetscBool flg;
3559 
3560     PetscCall(ISGetLocalSize(iscol, &n));
3561     PetscCall(ISGetSize(iscol, &Ncols));
3562 
3563     /* (1) iscol -> nonscalable iscol_local */
3564     /* Check for special case: each processor gets entire matrix columns */
3565     PetscCall(ISIdentity(iscol_local, &flg));
3566     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3567     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3568     if (allcolumns) {
3569       iscol_sub = iscol_local;
3570       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3571       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3572 
3573     } else {
3574       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3575       PetscInt *idx, *cmap1, k;
3576       PetscCall(PetscMalloc1(Ncols, &idx));
3577       PetscCall(PetscMalloc1(Ncols, &cmap1));
3578       PetscCall(ISGetIndices(iscol_local, &is_idx));
3579       count = 0;
3580       k     = 0;
3581       for (i = 0; i < Ncols; i++) {
3582         j = is_idx[i];
3583         if (j >= cstart && j < cend) {
3584           /* diagonal part of mat */
3585           idx[count]     = j;
3586           cmap1[count++] = i; /* column index in submat */
3587         } else if (Bn) {
3588           /* off-diagonal part of mat */
3589           if (j == garray[k]) {
3590             idx[count]     = j;
3591             cmap1[count++] = i; /* column index in submat */
3592           } else if (j > garray[k]) {
3593             while (j > garray[k] && k < Bn - 1) k++;
3594             if (j == garray[k]) {
3595               idx[count]     = j;
3596               cmap1[count++] = i; /* column index in submat */
3597             }
3598           }
3599         }
3600       }
3601       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3602 
3603       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3604       PetscCall(ISGetBlockSize(iscol, &cbs));
3605       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3606 
3607       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3608     }
3609 
3610     /* (3) Create sequential Msub */
3611     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3612   }
3613 
3614   PetscCall(ISGetLocalSize(iscol_sub, &count));
3615   aij = (Mat_SeqAIJ *)(Msub)->data;
3616   ii  = aij->i;
3617   PetscCall(ISGetIndices(iscmap, &cmap));
3618 
3619   /*
3620       m - number of local rows
3621       Ncols - number of columns (same on all processors)
3622       rstart - first row in new global matrix generated
3623   */
3624   PetscCall(MatGetSize(Msub, &m, NULL));
3625 
3626   if (call == MAT_INITIAL_MATRIX) {
3627     /* (4) Create parallel newmat */
3628     PetscMPIInt rank, size;
3629     PetscInt    csize;
3630 
3631     PetscCallMPI(MPI_Comm_size(comm, &size));
3632     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3633 
3634     /*
3635         Determine the number of non-zeros in the diagonal and off-diagonal
3636         portions of the matrix in order to do correct preallocation
3637     */
3638 
3639     /* first get start and end of "diagonal" columns */
3640     PetscCall(ISGetLocalSize(iscol, &csize));
3641     if (csize == PETSC_DECIDE) {
3642       PetscCall(ISGetSize(isrow, &mglobal));
3643       if (mglobal == Ncols) { /* square matrix */
3644         nlocal = m;
3645       } else {
3646         nlocal = Ncols / size + ((Ncols % size) > rank);
3647       }
3648     } else {
3649       nlocal = csize;
3650     }
3651     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3652     rstart = rend - nlocal;
3653     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3654 
3655     /* next, compute all the lengths */
3656     jj = aij->j;
3657     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3658     olens = dlens + m;
3659     for (i = 0; i < m; i++) {
3660       jend = ii[i + 1] - ii[i];
3661       olen = 0;
3662       dlen = 0;
3663       for (j = 0; j < jend; j++) {
3664         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3665         else dlen++;
3666         jj++;
3667       }
3668       olens[i] = olen;
3669       dlens[i] = dlen;
3670     }
3671 
3672     PetscCall(ISGetBlockSize(isrow, &bs));
3673     PetscCall(ISGetBlockSize(iscol, &cbs));
3674 
3675     PetscCall(MatCreate(comm, &M));
3676     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3677     PetscCall(MatSetBlockSizes(M, bs, cbs));
3678     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3679     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3680     PetscCall(PetscFree(dlens));
3681 
3682   } else { /* call == MAT_REUSE_MATRIX */
3683     M = *newmat;
3684     PetscCall(MatGetLocalSize(M, &i, NULL));
3685     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3686     PetscCall(MatZeroEntries(M));
3687     /*
3688          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3689        rather than the slower MatSetValues().
3690     */
3691     M->was_assembled = PETSC_TRUE;
3692     M->assembled     = PETSC_FALSE;
3693   }
3694 
3695   /* (5) Set values of Msub to *newmat */
3696   PetscCall(PetscMalloc1(count, &colsub));
3697   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3698 
3699   jj = aij->j;
3700   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3701   for (i = 0; i < m; i++) {
3702     row = rstart + i;
3703     nz  = ii[i + 1] - ii[i];
3704     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3705     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3706     jj += nz;
3707     aa += nz;
3708   }
3709   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3710   PetscCall(ISRestoreIndices(iscmap, &cmap));
3711 
3712   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3713   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3714 
3715   PetscCall(PetscFree(colsub));
3716 
3717   /* save Msub, iscol_sub and iscmap used in processor for next request */
3718   if (call == MAT_INITIAL_MATRIX) {
3719     *newmat = M;
3720     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3721     PetscCall(MatDestroy(&Msub));
3722 
3723     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3724     PetscCall(ISDestroy(&iscol_sub));
3725 
3726     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3727     PetscCall(ISDestroy(&iscmap));
3728 
3729     if (iscol_local) {
3730       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3731       PetscCall(ISDestroy(&iscol_local));
3732     }
3733   }
3734   PetscFunctionReturn(PETSC_SUCCESS);
3735 }
3736 
3737 /*
3738     Not great since it makes two copies of the submatrix, first an SeqAIJ
3739   in local and then by concatenating the local matrices the end result.
3740   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3741 
3742   This requires a sequential iscol with all indices.
3743 */
3744 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3745 {
3746   PetscMPIInt rank, size;
3747   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3748   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3749   Mat         M, Mreuse;
3750   MatScalar  *aa, *vwork;
3751   MPI_Comm    comm;
3752   Mat_SeqAIJ *aij;
3753   PetscBool   colflag, allcolumns = PETSC_FALSE;
3754 
3755   PetscFunctionBegin;
3756   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3757   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3758   PetscCallMPI(MPI_Comm_size(comm, &size));
3759 
3760   /* Check for special case: each processor gets entire matrix columns */
3761   PetscCall(ISIdentity(iscol, &colflag));
3762   PetscCall(ISGetLocalSize(iscol, &n));
3763   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3764   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3765 
3766   if (call == MAT_REUSE_MATRIX) {
3767     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3768     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3769     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3770   } else {
3771     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3772   }
3773 
3774   /*
3775       m - number of local rows
3776       n - number of columns (same on all processors)
3777       rstart - first row in new global matrix generated
3778   */
3779   PetscCall(MatGetSize(Mreuse, &m, &n));
3780   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3781   if (call == MAT_INITIAL_MATRIX) {
3782     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3783     ii  = aij->i;
3784     jj  = aij->j;
3785 
3786     /*
3787         Determine the number of non-zeros in the diagonal and off-diagonal
3788         portions of the matrix in order to do correct preallocation
3789     */
3790 
3791     /* first get start and end of "diagonal" columns */
3792     if (csize == PETSC_DECIDE) {
3793       PetscCall(ISGetSize(isrow, &mglobal));
3794       if (mglobal == n) { /* square matrix */
3795         nlocal = m;
3796       } else {
3797         nlocal = n / size + ((n % size) > rank);
3798       }
3799     } else {
3800       nlocal = csize;
3801     }
3802     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3803     rstart = rend - nlocal;
3804     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3805 
3806     /* next, compute all the lengths */
3807     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3808     olens = dlens + m;
3809     for (i = 0; i < m; i++) {
3810       jend = ii[i + 1] - ii[i];
3811       olen = 0;
3812       dlen = 0;
3813       for (j = 0; j < jend; j++) {
3814         if (*jj < rstart || *jj >= rend) olen++;
3815         else dlen++;
3816         jj++;
3817       }
3818       olens[i] = olen;
3819       dlens[i] = dlen;
3820     }
3821     PetscCall(MatCreate(comm, &M));
3822     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3823     PetscCall(MatSetBlockSizes(M, bs, cbs));
3824     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3825     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3826     PetscCall(PetscFree(dlens));
3827   } else {
3828     PetscInt ml, nl;
3829 
3830     M = *newmat;
3831     PetscCall(MatGetLocalSize(M, &ml, &nl));
3832     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3833     PetscCall(MatZeroEntries(M));
3834     /*
3835          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3836        rather than the slower MatSetValues().
3837     */
3838     M->was_assembled = PETSC_TRUE;
3839     M->assembled     = PETSC_FALSE;
3840   }
3841   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3842   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3843   ii  = aij->i;
3844   jj  = aij->j;
3845 
3846   /* trigger copy to CPU if needed */
3847   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3848   for (i = 0; i < m; i++) {
3849     row   = rstart + i;
3850     nz    = ii[i + 1] - ii[i];
3851     cwork = jj;
3852     jj += nz;
3853     vwork = aa;
3854     aa += nz;
3855     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3856   }
3857   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3858 
3859   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3860   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3861   *newmat = M;
3862 
3863   /* save submatrix used in processor for next request */
3864   if (call == MAT_INITIAL_MATRIX) {
3865     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3866     PetscCall(MatDestroy(&Mreuse));
3867   }
3868   PetscFunctionReturn(PETSC_SUCCESS);
3869 }
3870 
3871 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3872 {
3873   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3874   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3875   const PetscInt *JJ;
3876   PetscBool       nooffprocentries;
3877   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3878 
3879   PetscFunctionBegin;
3880   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3881 
3882   PetscCall(PetscLayoutSetUp(B->rmap));
3883   PetscCall(PetscLayoutSetUp(B->cmap));
3884   m      = B->rmap->n;
3885   cstart = B->cmap->rstart;
3886   cend   = B->cmap->rend;
3887   rstart = B->rmap->rstart;
3888 
3889   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3890 
3891   if (PetscDefined(USE_DEBUG)) {
3892     for (i = 0; i < m; i++) {
3893       nnz = Ii[i + 1] - Ii[i];
3894       JJ  = J ? J + Ii[i] : NULL;
3895       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3896       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3897       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3898     }
3899   }
3900 
3901   for (i = 0; i < m; i++) {
3902     nnz     = Ii[i + 1] - Ii[i];
3903     JJ      = J ? J + Ii[i] : NULL;
3904     nnz_max = PetscMax(nnz_max, nnz);
3905     d       = 0;
3906     for (j = 0; j < nnz; j++) {
3907       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3908     }
3909     d_nnz[i] = d;
3910     o_nnz[i] = nnz - d;
3911   }
3912   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3913   PetscCall(PetscFree2(d_nnz, o_nnz));
3914 
3915   for (i = 0; i < m; i++) {
3916     ii = i + rstart;
3917     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J ? J + Ii[i] : NULL, v ? v + Ii[i] : NULL, INSERT_VALUES));
3918   }
3919   nooffprocentries    = B->nooffprocentries;
3920   B->nooffprocentries = PETSC_TRUE;
3921   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3922   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3923   B->nooffprocentries = nooffprocentries;
3924 
3925   /* count number of entries below block diagonal */
3926   PetscCall(PetscFree(Aij->ld));
3927   PetscCall(PetscCalloc1(m, &ld));
3928   Aij->ld = ld;
3929   for (i = 0; i < m; i++) {
3930     nnz = Ii[i + 1] - Ii[i];
3931     j   = 0;
3932     while (j < nnz && J[j] < cstart) j++;
3933     ld[i] = j;
3934     if (J) J += nnz;
3935   }
3936 
3937   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3938   PetscFunctionReturn(PETSC_SUCCESS);
3939 }
3940 
3941 /*@
3942   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3943   (the default parallel PETSc format).
3944 
3945   Collective
3946 
3947   Input Parameters:
3948 + B - the matrix
3949 . i - the indices into j for the start of each local row (starts with zero)
3950 . j - the column indices for each local row (starts with zero)
3951 - v - optional values in the matrix
3952 
3953   Level: developer
3954 
3955   Notes:
3956   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3957   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3958   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3959 
3960   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3961 
3962   The format which is used for the sparse matrix input, is equivalent to a
3963   row-major ordering.. i.e for the following matrix, the input data expected is
3964   as shown
3965 
3966 .vb
3967         1 0 0
3968         2 0 3     P0
3969        -------
3970         4 5 6     P1
3971 
3972      Process0 [P0] rows_owned=[0,1]
3973         i =  {0,1,3}  [size = nrow+1  = 2+1]
3974         j =  {0,0,2}  [size = 3]
3975         v =  {1,2,3}  [size = 3]
3976 
3977      Process1 [P1] rows_owned=[2]
3978         i =  {0,3}    [size = nrow+1  = 1+1]
3979         j =  {0,1,2}  [size = 3]
3980         v =  {4,5,6}  [size = 3]
3981 .ve
3982 
3983 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
3984           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3985 @*/
3986 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3987 {
3988   PetscFunctionBegin;
3989   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3990   PetscFunctionReturn(PETSC_SUCCESS);
3991 }
3992 
3993 /*@C
3994   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3995   (the default parallel PETSc format).  For good matrix assembly performance
3996   the user should preallocate the matrix storage by setting the parameters
3997   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
3998 
3999   Collective
4000 
4001   Input Parameters:
4002 + B     - the matrix
4003 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4004            (same value is used for all local rows)
4005 . d_nnz - array containing the number of nonzeros in the various rows of the
4006            DIAGONAL portion of the local submatrix (possibly different for each row)
4007            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4008            The size of this array is equal to the number of local rows, i.e 'm'.
4009            For matrices that will be factored, you must leave room for (and set)
4010            the diagonal entry even if it is zero.
4011 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4012            submatrix (same value is used for all local rows).
4013 - o_nnz - array containing the number of nonzeros in the various rows of the
4014            OFF-DIAGONAL portion of the local submatrix (possibly different for
4015            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4016            structure. The size of this array is equal to the number
4017            of local rows, i.e 'm'.
4018 
4019   Example Usage:
4020   Consider the following 8x8 matrix with 34 non-zero values, that is
4021   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4022   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4023   as follows
4024 
4025 .vb
4026             1  2  0  |  0  3  0  |  0  4
4027     Proc0   0  5  6  |  7  0  0  |  8  0
4028             9  0 10  | 11  0  0  | 12  0
4029     -------------------------------------
4030            13  0 14  | 15 16 17  |  0  0
4031     Proc1   0 18  0  | 19 20 21  |  0  0
4032             0  0  0  | 22 23  0  | 24  0
4033     -------------------------------------
4034     Proc2  25 26 27  |  0  0 28  | 29  0
4035            30  0  0  | 31 32 33  |  0 34
4036 .ve
4037 
4038   This can be represented as a collection of submatrices as
4039 .vb
4040       A B C
4041       D E F
4042       G H I
4043 .ve
4044 
4045   Where the submatrices A,B,C are owned by proc0, D,E,F are
4046   owned by proc1, G,H,I are owned by proc2.
4047 
4048   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4049   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4050   The 'M','N' parameters are 8,8, and have the same values on all procs.
4051 
4052   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4053   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4054   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4055   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4056   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4057   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4058 
4059   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4060   allocated for every row of the local diagonal submatrix, and `o_nz`
4061   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4062   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4063   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4064   In this case, the values of `d_nz`, `o_nz` are
4065 .vb
4066      proc0  dnz = 2, o_nz = 2
4067      proc1  dnz = 3, o_nz = 2
4068      proc2  dnz = 1, o_nz = 4
4069 .ve
4070   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4071   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4072   for proc3. i.e we are using 12+15+10=37 storage locations to store
4073   34 values.
4074 
4075   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4076   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4077   In the above case the values for `d_nnz`, `o_nnz` are
4078 .vb
4079      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4080      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4081      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4082 .ve
4083   Here the space allocated is sum of all the above values i.e 34, and
4084   hence pre-allocation is perfect.
4085 
4086   Level: intermediate
4087 
4088   Notes:
4089   If the *_nnz parameter is given then the *_nz parameter is ignored
4090 
4091   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4092   storage.  The stored row and column indices begin with zero.
4093   See [Sparse Matrices](sec_matsparse) for details.
4094 
4095   The parallel matrix is partitioned such that the first m0 rows belong to
4096   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4097   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4098 
4099   The DIAGONAL portion of the local submatrix of a processor can be defined
4100   as the submatrix which is obtained by extraction the part corresponding to
4101   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4102   first row that belongs to the processor, r2 is the last row belonging to
4103   the this processor, and c1-c2 is range of indices of the local part of a
4104   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4105   common case of a square matrix, the row and column ranges are the same and
4106   the DIAGONAL part is also square. The remaining portion of the local
4107   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4108 
4109   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4110 
4111   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4112   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4113   You can also run with the option `-info` and look for messages with the string
4114   malloc in them to see if additional memory allocation was needed.
4115 
4116 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4117           `MatGetInfo()`, `PetscSplitOwnership()`
4118 @*/
4119 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4120 {
4121   PetscFunctionBegin;
4122   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4123   PetscValidType(B, 1);
4124   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4125   PetscFunctionReturn(PETSC_SUCCESS);
4126 }
4127 
4128 /*@
4129   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4130   CSR format for the local rows.
4131 
4132   Collective
4133 
4134   Input Parameters:
4135 + comm - MPI communicator
4136 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4137 . n    - This value should be the same as the local size used in creating the
4138        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4139        calculated if N is given) For square matrices n is almost always m.
4140 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4141 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4142 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4143 . j    - column indices
4144 - a    - optional matrix values
4145 
4146   Output Parameter:
4147 . mat - the matrix
4148 
4149   Level: intermediate
4150 
4151   Notes:
4152   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4153   thus you CANNOT change the matrix entries by changing the values of a[] after you have
4154   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4155 
4156   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4157 
4158   The format which is used for the sparse matrix input, is equivalent to a
4159   row-major ordering.. i.e for the following matrix, the input data expected is
4160   as shown
4161 
4162   Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4163 .vb
4164         1 0 0
4165         2 0 3     P0
4166        -------
4167         4 5 6     P1
4168 
4169      Process0 [P0] rows_owned=[0,1]
4170         i =  {0,1,3}  [size = nrow+1  = 2+1]
4171         j =  {0,0,2}  [size = 3]
4172         v =  {1,2,3}  [size = 3]
4173 
4174      Process1 [P1] rows_owned=[2]
4175         i =  {0,3}    [size = nrow+1  = 1+1]
4176         j =  {0,1,2}  [size = 3]
4177         v =  {4,5,6}  [size = 3]
4178 .ve
4179 
4180 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4181           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4182 @*/
4183 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4184 {
4185   PetscFunctionBegin;
4186   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4187   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4188   PetscCall(MatCreate(comm, mat));
4189   PetscCall(MatSetSizes(*mat, m, n, M, N));
4190   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4191   PetscCall(MatSetType(*mat, MATMPIAIJ));
4192   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4193   PetscFunctionReturn(PETSC_SUCCESS);
4194 }
4195 
4196 /*@
4197   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4198   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4199   from `MatCreateMPIAIJWithArrays()`
4200 
4201   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4202 
4203   Collective
4204 
4205   Input Parameters:
4206 + mat - the matrix
4207 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4208 . n   - This value should be the same as the local size used in creating the
4209        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4210        calculated if N is given) For square matrices n is almost always m.
4211 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4212 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4213 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4214 . J   - column indices
4215 - v   - matrix values
4216 
4217   Level: deprecated
4218 
4219 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4220           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`
4221 @*/
4222 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4223 {
4224   PetscInt        nnz, i;
4225   PetscBool       nooffprocentries;
4226   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4227   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4228   PetscScalar    *ad, *ao;
4229   PetscInt        ldi, Iii, md;
4230   const PetscInt *Adi = Ad->i;
4231   PetscInt       *ld  = Aij->ld;
4232 
4233   PetscFunctionBegin;
4234   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4235   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4236   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4237   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4238 
4239   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4240   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4241 
4242   for (i = 0; i < m; i++) {
4243     nnz = Ii[i + 1] - Ii[i];
4244     Iii = Ii[i];
4245     ldi = ld[i];
4246     md  = Adi[i + 1] - Adi[i];
4247     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4248     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4249     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4250     ad += md;
4251     ao += nnz - md;
4252   }
4253   nooffprocentries      = mat->nooffprocentries;
4254   mat->nooffprocentries = PETSC_TRUE;
4255   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4256   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4257   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4258   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4259   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4260   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4261   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4262   mat->nooffprocentries = nooffprocentries;
4263   PetscFunctionReturn(PETSC_SUCCESS);
4264 }
4265 
4266 /*@
4267   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4268 
4269   Collective
4270 
4271   Input Parameters:
4272 + mat - the matrix
4273 - v   - matrix values, stored by row
4274 
4275   Level: intermediate
4276 
4277   Note:
4278   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4279 
4280 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4281           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4282 @*/
4283 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4284 {
4285   PetscInt        nnz, i, m;
4286   PetscBool       nooffprocentries;
4287   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4288   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4289   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4290   PetscScalar    *ad, *ao;
4291   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4292   PetscInt        ldi, Iii, md;
4293   PetscInt       *ld = Aij->ld;
4294 
4295   PetscFunctionBegin;
4296   m = mat->rmap->n;
4297 
4298   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4299   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4300   Iii = 0;
4301   for (i = 0; i < m; i++) {
4302     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4303     ldi = ld[i];
4304     md  = Adi[i + 1] - Adi[i];
4305     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4306     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4307     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4308     ad += md;
4309     ao += nnz - md;
4310     Iii += nnz;
4311   }
4312   nooffprocentries      = mat->nooffprocentries;
4313   mat->nooffprocentries = PETSC_TRUE;
4314   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4315   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4316   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4317   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4318   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4319   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4320   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4321   mat->nooffprocentries = nooffprocentries;
4322   PetscFunctionReturn(PETSC_SUCCESS);
4323 }
4324 
4325 /*@C
4326   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4327   (the default parallel PETSc format).  For good matrix assembly performance
4328   the user should preallocate the matrix storage by setting the parameters
4329   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4330 
4331   Collective
4332 
4333   Input Parameters:
4334 + comm  - MPI communicator
4335 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4336            This value should be the same as the local size used in creating the
4337            y vector for the matrix-vector product y = Ax.
4338 . n     - This value should be the same as the local size used in creating the
4339        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4340        calculated if N is given) For square matrices n is almost always m.
4341 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4342 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4343 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4344            (same value is used for all local rows)
4345 . d_nnz - array containing the number of nonzeros in the various rows of the
4346            DIAGONAL portion of the local submatrix (possibly different for each row)
4347            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4348            The size of this array is equal to the number of local rows, i.e 'm'.
4349 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4350            submatrix (same value is used for all local rows).
4351 - o_nnz - array containing the number of nonzeros in the various rows of the
4352            OFF-DIAGONAL portion of the local submatrix (possibly different for
4353            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4354            structure. The size of this array is equal to the number
4355            of local rows, i.e 'm'.
4356 
4357   Output Parameter:
4358 . A - the matrix
4359 
4360   Options Database Keys:
4361 + -mat_no_inode                     - Do not use inodes
4362 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4363 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4364         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4365         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4366 
4367   Level: intermediate
4368 
4369   Notes:
4370   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4371   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4372   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4373 
4374   If the *_nnz parameter is given then the *_nz parameter is ignored
4375 
4376   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4377   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4378   storage requirements for this matrix.
4379 
4380   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4381   processor than it must be used on all processors that share the object for
4382   that argument.
4383 
4384   The user MUST specify either the local or global matrix dimensions
4385   (possibly both).
4386 
4387   The parallel matrix is partitioned across processors such that the
4388   first m0 rows belong to process 0, the next m1 rows belong to
4389   process 1, the next m2 rows belong to process 2 etc.. where
4390   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4391   values corresponding to [m x N] submatrix.
4392 
4393   The columns are logically partitioned with the n0 columns belonging
4394   to 0th partition, the next n1 columns belonging to the next
4395   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4396 
4397   The DIAGONAL portion of the local submatrix on any given processor
4398   is the submatrix corresponding to the rows and columns m,n
4399   corresponding to the given processor. i.e diagonal matrix on
4400   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4401   etc. The remaining portion of the local submatrix [m x (N-n)]
4402   constitute the OFF-DIAGONAL portion. The example below better
4403   illustrates this concept.
4404 
4405   For a square global matrix we define each processor's diagonal portion
4406   to be its local rows and the corresponding columns (a square submatrix);
4407   each processor's off-diagonal portion encompasses the remainder of the
4408   local matrix (a rectangular submatrix).
4409 
4410   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4411 
4412   When calling this routine with a single process communicator, a matrix of
4413   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4414   type of communicator, use the construction mechanism
4415 .vb
4416   MatCreate(..., &A);
4417   MatSetType(A, MATMPIAIJ);
4418   MatSetSizes(A, m, n, M, N);
4419   MatMPIAIJSetPreallocation(A, ...);
4420 .ve
4421 
4422   By default, this format uses inodes (identical nodes) when possible.
4423   We search for consecutive rows with the same nonzero structure, thereby
4424   reusing matrix information to achieve increased efficiency.
4425 
4426   Example Usage:
4427   Consider the following 8x8 matrix with 34 non-zero values, that is
4428   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4429   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4430   as follows
4431 
4432 .vb
4433             1  2  0  |  0  3  0  |  0  4
4434     Proc0   0  5  6  |  7  0  0  |  8  0
4435             9  0 10  | 11  0  0  | 12  0
4436     -------------------------------------
4437            13  0 14  | 15 16 17  |  0  0
4438     Proc1   0 18  0  | 19 20 21  |  0  0
4439             0  0  0  | 22 23  0  | 24  0
4440     -------------------------------------
4441     Proc2  25 26 27  |  0  0 28  | 29  0
4442            30  0  0  | 31 32 33  |  0 34
4443 .ve
4444 
4445   This can be represented as a collection of submatrices as
4446 
4447 .vb
4448       A B C
4449       D E F
4450       G H I
4451 .ve
4452 
4453   Where the submatrices A,B,C are owned by proc0, D,E,F are
4454   owned by proc1, G,H,I are owned by proc2.
4455 
4456   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4457   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4458   The 'M','N' parameters are 8,8, and have the same values on all procs.
4459 
4460   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4461   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4462   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4463   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4464   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4465   matrix, ans [DF] as another SeqAIJ matrix.
4466 
4467   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4468   allocated for every row of the local diagonal submatrix, and `o_nz`
4469   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4470   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4471   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4472   In this case, the values of `d_nz`,`o_nz` are
4473 .vb
4474      proc0  dnz = 2, o_nz = 2
4475      proc1  dnz = 3, o_nz = 2
4476      proc2  dnz = 1, o_nz = 4
4477 .ve
4478   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4479   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4480   for proc3. i.e we are using 12+15+10=37 storage locations to store
4481   34 values.
4482 
4483   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4484   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4485   In the above case the values for d_nnz,o_nnz are
4486 .vb
4487      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4488      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4489      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4490 .ve
4491   Here the space allocated is sum of all the above values i.e 34, and
4492   hence pre-allocation is perfect.
4493 
4494 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4495           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4496 @*/
4497 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4498 {
4499   PetscMPIInt size;
4500 
4501   PetscFunctionBegin;
4502   PetscCall(MatCreate(comm, A));
4503   PetscCall(MatSetSizes(*A, m, n, M, N));
4504   PetscCallMPI(MPI_Comm_size(comm, &size));
4505   if (size > 1) {
4506     PetscCall(MatSetType(*A, MATMPIAIJ));
4507     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4508   } else {
4509     PetscCall(MatSetType(*A, MATSEQAIJ));
4510     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4511   }
4512   PetscFunctionReturn(PETSC_SUCCESS);
4513 }
4514 
4515 /*MC
4516     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4517 
4518     Synopsis:
4519     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4520 
4521     Not Collective
4522 
4523     Input Parameter:
4524 .   A - the `MATMPIAIJ` matrix
4525 
4526     Output Parameters:
4527 +   Ad - the diagonal portion of the matrix
4528 .   Ao - the off-diagonal portion of the matrix
4529 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4530 -   ierr - error code
4531 
4532      Level: advanced
4533 
4534     Note:
4535     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4536 
4537 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4538 M*/
4539 
4540 /*MC
4541     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4542 
4543     Synopsis:
4544     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4545 
4546     Not Collective
4547 
4548     Input Parameters:
4549 +   A - the `MATMPIAIJ` matrix
4550 .   Ad - the diagonal portion of the matrix
4551 .   Ao - the off-diagonal portion of the matrix
4552 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4553 -   ierr - error code
4554 
4555      Level: advanced
4556 
4557 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4558 M*/
4559 
4560 /*@C
4561   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4562 
4563   Not Collective
4564 
4565   Input Parameter:
4566 . A - The `MATMPIAIJ` matrix
4567 
4568   Output Parameters:
4569 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4570 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4571 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4572 
4573   Level: intermediate
4574 
4575   Note:
4576   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4577   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4578   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4579   local column numbers to global column numbers in the original matrix.
4580 
4581   Fortran Notes:
4582   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4583 
4584 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4585 @*/
4586 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4587 {
4588   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4589   PetscBool   flg;
4590 
4591   PetscFunctionBegin;
4592   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4593   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4594   if (Ad) *Ad = a->A;
4595   if (Ao) *Ao = a->B;
4596   if (colmap) *colmap = a->garray;
4597   PetscFunctionReturn(PETSC_SUCCESS);
4598 }
4599 
4600 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4601 {
4602   PetscInt     m, N, i, rstart, nnz, Ii;
4603   PetscInt    *indx;
4604   PetscScalar *values;
4605   MatType      rootType;
4606 
4607   PetscFunctionBegin;
4608   PetscCall(MatGetSize(inmat, &m, &N));
4609   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4610     PetscInt *dnz, *onz, sum, bs, cbs;
4611 
4612     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4613     /* Check sum(n) = N */
4614     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4615     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4616 
4617     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4618     rstart -= m;
4619 
4620     MatPreallocateBegin(comm, m, n, dnz, onz);
4621     for (i = 0; i < m; i++) {
4622       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4623       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4624       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4625     }
4626 
4627     PetscCall(MatCreate(comm, outmat));
4628     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4629     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4630     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4631     PetscCall(MatGetRootType_Private(inmat, &rootType));
4632     PetscCall(MatSetType(*outmat, rootType));
4633     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4634     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4635     MatPreallocateEnd(dnz, onz);
4636     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4637   }
4638 
4639   /* numeric phase */
4640   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4641   for (i = 0; i < m; i++) {
4642     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4643     Ii = i + rstart;
4644     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4645     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4646   }
4647   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4648   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4649   PetscFunctionReturn(PETSC_SUCCESS);
4650 }
4651 
4652 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4653 {
4654   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4655 
4656   PetscFunctionBegin;
4657   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4658   PetscCall(PetscFree(merge->id_r));
4659   PetscCall(PetscFree(merge->len_s));
4660   PetscCall(PetscFree(merge->len_r));
4661   PetscCall(PetscFree(merge->bi));
4662   PetscCall(PetscFree(merge->bj));
4663   PetscCall(PetscFree(merge->buf_ri[0]));
4664   PetscCall(PetscFree(merge->buf_ri));
4665   PetscCall(PetscFree(merge->buf_rj[0]));
4666   PetscCall(PetscFree(merge->buf_rj));
4667   PetscCall(PetscFree(merge->coi));
4668   PetscCall(PetscFree(merge->coj));
4669   PetscCall(PetscFree(merge->owners_co));
4670   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4671   PetscCall(PetscFree(merge));
4672   PetscFunctionReturn(PETSC_SUCCESS);
4673 }
4674 
4675 #include <../src/mat/utils/freespace.h>
4676 #include <petscbt.h>
4677 
4678 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4679 {
4680   MPI_Comm             comm;
4681   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4682   PetscMPIInt          size, rank, taga, *len_s;
4683   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4684   PetscInt             proc, m;
4685   PetscInt           **buf_ri, **buf_rj;
4686   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4687   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4688   MPI_Request         *s_waits, *r_waits;
4689   MPI_Status          *status;
4690   const MatScalar     *aa, *a_a;
4691   MatScalar          **abuf_r, *ba_i;
4692   Mat_Merge_SeqsToMPI *merge;
4693   PetscContainer       container;
4694 
4695   PetscFunctionBegin;
4696   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4697   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4698 
4699   PetscCallMPI(MPI_Comm_size(comm, &size));
4700   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4701 
4702   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4703   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4704   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4705   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4706   aa = a_a;
4707 
4708   bi     = merge->bi;
4709   bj     = merge->bj;
4710   buf_ri = merge->buf_ri;
4711   buf_rj = merge->buf_rj;
4712 
4713   PetscCall(PetscMalloc1(size, &status));
4714   owners = merge->rowmap->range;
4715   len_s  = merge->len_s;
4716 
4717   /* send and recv matrix values */
4718   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4719   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4720 
4721   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4722   for (proc = 0, k = 0; proc < size; proc++) {
4723     if (!len_s[proc]) continue;
4724     i = owners[proc];
4725     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4726     k++;
4727   }
4728 
4729   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4730   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4731   PetscCall(PetscFree(status));
4732 
4733   PetscCall(PetscFree(s_waits));
4734   PetscCall(PetscFree(r_waits));
4735 
4736   /* insert mat values of mpimat */
4737   PetscCall(PetscMalloc1(N, &ba_i));
4738   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4739 
4740   for (k = 0; k < merge->nrecv; k++) {
4741     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4742     nrows       = *(buf_ri_k[k]);
4743     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4744     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4745   }
4746 
4747   /* set values of ba */
4748   m = merge->rowmap->n;
4749   for (i = 0; i < m; i++) {
4750     arow = owners[rank] + i;
4751     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4752     bnzi = bi[i + 1] - bi[i];
4753     PetscCall(PetscArrayzero(ba_i, bnzi));
4754 
4755     /* add local non-zero vals of this proc's seqmat into ba */
4756     anzi   = ai[arow + 1] - ai[arow];
4757     aj     = a->j + ai[arow];
4758     aa     = a_a + ai[arow];
4759     nextaj = 0;
4760     for (j = 0; nextaj < anzi; j++) {
4761       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4762         ba_i[j] += aa[nextaj++];
4763       }
4764     }
4765 
4766     /* add received vals into ba */
4767     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4768       /* i-th row */
4769       if (i == *nextrow[k]) {
4770         anzi   = *(nextai[k] + 1) - *nextai[k];
4771         aj     = buf_rj[k] + *(nextai[k]);
4772         aa     = abuf_r[k] + *(nextai[k]);
4773         nextaj = 0;
4774         for (j = 0; nextaj < anzi; j++) {
4775           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4776             ba_i[j] += aa[nextaj++];
4777           }
4778         }
4779         nextrow[k]++;
4780         nextai[k]++;
4781       }
4782     }
4783     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4784   }
4785   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4786   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4787   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4788 
4789   PetscCall(PetscFree(abuf_r[0]));
4790   PetscCall(PetscFree(abuf_r));
4791   PetscCall(PetscFree(ba_i));
4792   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4793   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4794   PetscFunctionReturn(PETSC_SUCCESS);
4795 }
4796 
4797 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4798 {
4799   Mat                  B_mpi;
4800   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4801   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4802   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4803   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4804   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4805   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4806   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4807   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4808   MPI_Status          *status;
4809   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4810   PetscBT              lnkbt;
4811   Mat_Merge_SeqsToMPI *merge;
4812   PetscContainer       container;
4813 
4814   PetscFunctionBegin;
4815   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4816 
4817   /* make sure it is a PETSc comm */
4818   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4819   PetscCallMPI(MPI_Comm_size(comm, &size));
4820   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4821 
4822   PetscCall(PetscNew(&merge));
4823   PetscCall(PetscMalloc1(size, &status));
4824 
4825   /* determine row ownership */
4826   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4827   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4828   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4829   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4830   PetscCall(PetscLayoutSetUp(merge->rowmap));
4831   PetscCall(PetscMalloc1(size, &len_si));
4832   PetscCall(PetscMalloc1(size, &merge->len_s));
4833 
4834   m      = merge->rowmap->n;
4835   owners = merge->rowmap->range;
4836 
4837   /* determine the number of messages to send, their lengths */
4838   len_s = merge->len_s;
4839 
4840   len          = 0; /* length of buf_si[] */
4841   merge->nsend = 0;
4842   for (proc = 0; proc < size; proc++) {
4843     len_si[proc] = 0;
4844     if (proc == rank) {
4845       len_s[proc] = 0;
4846     } else {
4847       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4848       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4849     }
4850     if (len_s[proc]) {
4851       merge->nsend++;
4852       nrows = 0;
4853       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4854         if (ai[i + 1] > ai[i]) nrows++;
4855       }
4856       len_si[proc] = 2 * (nrows + 1);
4857       len += len_si[proc];
4858     }
4859   }
4860 
4861   /* determine the number and length of messages to receive for ij-structure */
4862   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4863   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4864 
4865   /* post the Irecv of j-structure */
4866   PetscCall(PetscCommGetNewTag(comm, &tagj));
4867   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4868 
4869   /* post the Isend of j-structure */
4870   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4871 
4872   for (proc = 0, k = 0; proc < size; proc++) {
4873     if (!len_s[proc]) continue;
4874     i = owners[proc];
4875     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4876     k++;
4877   }
4878 
4879   /* receives and sends of j-structure are complete */
4880   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4881   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4882 
4883   /* send and recv i-structure */
4884   PetscCall(PetscCommGetNewTag(comm, &tagi));
4885   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4886 
4887   PetscCall(PetscMalloc1(len + 1, &buf_s));
4888   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4889   for (proc = 0, k = 0; proc < size; proc++) {
4890     if (!len_s[proc]) continue;
4891     /* form outgoing message for i-structure:
4892          buf_si[0]:                 nrows to be sent
4893                [1:nrows]:           row index (global)
4894                [nrows+1:2*nrows+1]: i-structure index
4895     */
4896     nrows       = len_si[proc] / 2 - 1;
4897     buf_si_i    = buf_si + nrows + 1;
4898     buf_si[0]   = nrows;
4899     buf_si_i[0] = 0;
4900     nrows       = 0;
4901     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4902       anzi = ai[i + 1] - ai[i];
4903       if (anzi) {
4904         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4905         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4906         nrows++;
4907       }
4908     }
4909     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4910     k++;
4911     buf_si += len_si[proc];
4912   }
4913 
4914   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4915   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4916 
4917   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4918   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4919 
4920   PetscCall(PetscFree(len_si));
4921   PetscCall(PetscFree(len_ri));
4922   PetscCall(PetscFree(rj_waits));
4923   PetscCall(PetscFree2(si_waits, sj_waits));
4924   PetscCall(PetscFree(ri_waits));
4925   PetscCall(PetscFree(buf_s));
4926   PetscCall(PetscFree(status));
4927 
4928   /* compute a local seq matrix in each processor */
4929   /* allocate bi array and free space for accumulating nonzero column info */
4930   PetscCall(PetscMalloc1(m + 1, &bi));
4931   bi[0] = 0;
4932 
4933   /* create and initialize a linked list */
4934   nlnk = N + 1;
4935   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4936 
4937   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4938   len = ai[owners[rank + 1]] - ai[owners[rank]];
4939   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4940 
4941   current_space = free_space;
4942 
4943   /* determine symbolic info for each local row */
4944   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4945 
4946   for (k = 0; k < merge->nrecv; k++) {
4947     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4948     nrows       = *buf_ri_k[k];
4949     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4950     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4951   }
4952 
4953   MatPreallocateBegin(comm, m, n, dnz, onz);
4954   len = 0;
4955   for (i = 0; i < m; i++) {
4956     bnzi = 0;
4957     /* add local non-zero cols of this proc's seqmat into lnk */
4958     arow = owners[rank] + i;
4959     anzi = ai[arow + 1] - ai[arow];
4960     aj   = a->j + ai[arow];
4961     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4962     bnzi += nlnk;
4963     /* add received col data into lnk */
4964     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4965       if (i == *nextrow[k]) {            /* i-th row */
4966         anzi = *(nextai[k] + 1) - *nextai[k];
4967         aj   = buf_rj[k] + *nextai[k];
4968         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4969         bnzi += nlnk;
4970         nextrow[k]++;
4971         nextai[k]++;
4972       }
4973     }
4974     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4975 
4976     /* if free space is not available, make more free space */
4977     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
4978     /* copy data into free space, then initialize lnk */
4979     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
4980     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4981 
4982     current_space->array += bnzi;
4983     current_space->local_used += bnzi;
4984     current_space->local_remaining -= bnzi;
4985 
4986     bi[i + 1] = bi[i] + bnzi;
4987   }
4988 
4989   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4990 
4991   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
4992   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
4993   PetscCall(PetscLLDestroy(lnk, lnkbt));
4994 
4995   /* create symbolic parallel matrix B_mpi */
4996   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
4997   PetscCall(MatCreate(comm, &B_mpi));
4998   if (n == PETSC_DECIDE) {
4999     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5000   } else {
5001     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5002   }
5003   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5004   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5005   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5006   MatPreallocateEnd(dnz, onz);
5007   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5008 
5009   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5010   B_mpi->assembled = PETSC_FALSE;
5011   merge->bi        = bi;
5012   merge->bj        = bj;
5013   merge->buf_ri    = buf_ri;
5014   merge->buf_rj    = buf_rj;
5015   merge->coi       = NULL;
5016   merge->coj       = NULL;
5017   merge->owners_co = NULL;
5018 
5019   PetscCall(PetscCommDestroy(&comm));
5020 
5021   /* attach the supporting struct to B_mpi for reuse */
5022   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5023   PetscCall(PetscContainerSetPointer(container, merge));
5024   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5025   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5026   PetscCall(PetscContainerDestroy(&container));
5027   *mpimat = B_mpi;
5028 
5029   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5030   PetscFunctionReturn(PETSC_SUCCESS);
5031 }
5032 
5033 /*@C
5034   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5035   matrices from each processor
5036 
5037   Collective
5038 
5039   Input Parameters:
5040 + comm   - the communicators the parallel matrix will live on
5041 . seqmat - the input sequential matrices
5042 . m      - number of local rows (or `PETSC_DECIDE`)
5043 . n      - number of local columns (or `PETSC_DECIDE`)
5044 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5045 
5046   Output Parameter:
5047 . mpimat - the parallel matrix generated
5048 
5049   Level: advanced
5050 
5051   Note:
5052   The dimensions of the sequential matrix in each processor MUST be the same.
5053   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5054   destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5055 
5056 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5057 @*/
5058 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5059 {
5060   PetscMPIInt size;
5061 
5062   PetscFunctionBegin;
5063   PetscCallMPI(MPI_Comm_size(comm, &size));
5064   if (size == 1) {
5065     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5066     if (scall == MAT_INITIAL_MATRIX) {
5067       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5068     } else {
5069       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5070     }
5071     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5072     PetscFunctionReturn(PETSC_SUCCESS);
5073   }
5074   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5075   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5076   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5077   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5078   PetscFunctionReturn(PETSC_SUCCESS);
5079 }
5080 
5081 /*@
5082   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5083 
5084   Not Collective
5085 
5086   Input Parameter:
5087 . A - the matrix
5088 
5089   Output Parameter:
5090 . A_loc - the local sequential matrix generated
5091 
5092   Level: developer
5093 
5094   Notes:
5095   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5096   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5097   `n` is the global column count obtained with `MatGetSize()`
5098 
5099   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5100 
5101   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5102 
5103   Destroy the matrix with `MatDestroy()`
5104 
5105 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5106 @*/
5107 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5108 {
5109   PetscBool mpi;
5110 
5111   PetscFunctionBegin;
5112   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5113   if (mpi) {
5114     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5115   } else {
5116     *A_loc = A;
5117     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5118   }
5119   PetscFunctionReturn(PETSC_SUCCESS);
5120 }
5121 
5122 /*@
5123   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5124 
5125   Not Collective
5126 
5127   Input Parameters:
5128 + A     - the matrix
5129 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5130 
5131   Output Parameter:
5132 . A_loc - the local sequential matrix generated
5133 
5134   Level: developer
5135 
5136   Notes:
5137   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5138   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5139   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5140 
5141   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5142 
5143   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5144   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5145   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5146   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5147 
5148 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5149 @*/
5150 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5151 {
5152   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5153   Mat_SeqAIJ        *mat, *a, *b;
5154   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5155   const PetscScalar *aa, *ba, *aav, *bav;
5156   PetscScalar       *ca, *cam;
5157   PetscMPIInt        size;
5158   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5159   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5160   PetscBool          match;
5161 
5162   PetscFunctionBegin;
5163   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5164   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5165   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5166   if (size == 1) {
5167     if (scall == MAT_INITIAL_MATRIX) {
5168       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5169       *A_loc = mpimat->A;
5170     } else if (scall == MAT_REUSE_MATRIX) {
5171       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5172     }
5173     PetscFunctionReturn(PETSC_SUCCESS);
5174   }
5175 
5176   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5177   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5178   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5179   ai = a->i;
5180   aj = a->j;
5181   bi = b->i;
5182   bj = b->j;
5183   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5184   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5185   aa = aav;
5186   ba = bav;
5187   if (scall == MAT_INITIAL_MATRIX) {
5188     PetscCall(PetscMalloc1(1 + am, &ci));
5189     ci[0] = 0;
5190     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5191     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5192     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5193     k = 0;
5194     for (i = 0; i < am; i++) {
5195       ncols_o = bi[i + 1] - bi[i];
5196       ncols_d = ai[i + 1] - ai[i];
5197       /* off-diagonal portion of A */
5198       for (jo = 0; jo < ncols_o; jo++) {
5199         col = cmap[*bj];
5200         if (col >= cstart) break;
5201         cj[k] = col;
5202         bj++;
5203         ca[k++] = *ba++;
5204       }
5205       /* diagonal portion of A */
5206       for (j = 0; j < ncols_d; j++) {
5207         cj[k]   = cstart + *aj++;
5208         ca[k++] = *aa++;
5209       }
5210       /* off-diagonal portion of A */
5211       for (j = jo; j < ncols_o; j++) {
5212         cj[k]   = cmap[*bj++];
5213         ca[k++] = *ba++;
5214       }
5215     }
5216     /* put together the new matrix */
5217     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5218     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5219     /* Since these are PETSc arrays, change flags to free them as necessary. */
5220     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5221     mat->free_a  = PETSC_TRUE;
5222     mat->free_ij = PETSC_TRUE;
5223     mat->nonew   = 0;
5224   } else if (scall == MAT_REUSE_MATRIX) {
5225     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5226     ci  = mat->i;
5227     cj  = mat->j;
5228     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5229     for (i = 0; i < am; i++) {
5230       /* off-diagonal portion of A */
5231       ncols_o = bi[i + 1] - bi[i];
5232       for (jo = 0; jo < ncols_o; jo++) {
5233         col = cmap[*bj];
5234         if (col >= cstart) break;
5235         *cam++ = *ba++;
5236         bj++;
5237       }
5238       /* diagonal portion of A */
5239       ncols_d = ai[i + 1] - ai[i];
5240       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5241       /* off-diagonal portion of A */
5242       for (j = jo; j < ncols_o; j++) {
5243         *cam++ = *ba++;
5244         bj++;
5245       }
5246     }
5247     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5248   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5249   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5250   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5251   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5252   PetscFunctionReturn(PETSC_SUCCESS);
5253 }
5254 
5255 /*@
5256   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5257   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5258 
5259   Not Collective
5260 
5261   Input Parameters:
5262 + A     - the matrix
5263 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5264 
5265   Output Parameters:
5266 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5267 - A_loc - the local sequential matrix generated
5268 
5269   Level: developer
5270 
5271   Note:
5272   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5273   part, then those associated with the off-diagonal part (in its local ordering)
5274 
5275 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5276 @*/
5277 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5278 {
5279   Mat             Ao, Ad;
5280   const PetscInt *cmap;
5281   PetscMPIInt     size;
5282   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5283 
5284   PetscFunctionBegin;
5285   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5286   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5287   if (size == 1) {
5288     if (scall == MAT_INITIAL_MATRIX) {
5289       PetscCall(PetscObjectReference((PetscObject)Ad));
5290       *A_loc = Ad;
5291     } else if (scall == MAT_REUSE_MATRIX) {
5292       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5293     }
5294     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5295     PetscFunctionReturn(PETSC_SUCCESS);
5296   }
5297   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5298   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5299   if (f) {
5300     PetscCall((*f)(A, scall, glob, A_loc));
5301   } else {
5302     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5303     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5304     Mat_SeqAIJ        *c;
5305     PetscInt          *ai = a->i, *aj = a->j;
5306     PetscInt          *bi = b->i, *bj = b->j;
5307     PetscInt          *ci, *cj;
5308     const PetscScalar *aa, *ba;
5309     PetscScalar       *ca;
5310     PetscInt           i, j, am, dn, on;
5311 
5312     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5313     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5314     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5315     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5316     if (scall == MAT_INITIAL_MATRIX) {
5317       PetscInt k;
5318       PetscCall(PetscMalloc1(1 + am, &ci));
5319       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5320       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5321       ci[0] = 0;
5322       for (i = 0, k = 0; i < am; i++) {
5323         const PetscInt ncols_o = bi[i + 1] - bi[i];
5324         const PetscInt ncols_d = ai[i + 1] - ai[i];
5325         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5326         /* diagonal portion of A */
5327         for (j = 0; j < ncols_d; j++, k++) {
5328           cj[k] = *aj++;
5329           ca[k] = *aa++;
5330         }
5331         /* off-diagonal portion of A */
5332         for (j = 0; j < ncols_o; j++, k++) {
5333           cj[k] = dn + *bj++;
5334           ca[k] = *ba++;
5335         }
5336       }
5337       /* put together the new matrix */
5338       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5339       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5340       /* Since these are PETSc arrays, change flags to free them as necessary. */
5341       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5342       c->free_a  = PETSC_TRUE;
5343       c->free_ij = PETSC_TRUE;
5344       c->nonew   = 0;
5345       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5346     } else if (scall == MAT_REUSE_MATRIX) {
5347       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5348       for (i = 0; i < am; i++) {
5349         const PetscInt ncols_d = ai[i + 1] - ai[i];
5350         const PetscInt ncols_o = bi[i + 1] - bi[i];
5351         /* diagonal portion of A */
5352         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5353         /* off-diagonal portion of A */
5354         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5355       }
5356       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5357     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5358     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5359     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5360     if (glob) {
5361       PetscInt cst, *gidx;
5362 
5363       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5364       PetscCall(PetscMalloc1(dn + on, &gidx));
5365       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5366       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5367       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5368     }
5369   }
5370   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5371   PetscFunctionReturn(PETSC_SUCCESS);
5372 }
5373 
5374 /*@C
5375   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5376 
5377   Not Collective
5378 
5379   Input Parameters:
5380 + A     - the matrix
5381 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5382 . row   - index set of rows to extract (or `NULL`)
5383 - col   - index set of columns to extract (or `NULL`)
5384 
5385   Output Parameter:
5386 . A_loc - the local sequential matrix generated
5387 
5388   Level: developer
5389 
5390 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5391 @*/
5392 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5393 {
5394   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5395   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5396   IS          isrowa, iscola;
5397   Mat        *aloc;
5398   PetscBool   match;
5399 
5400   PetscFunctionBegin;
5401   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5402   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5403   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5404   if (!row) {
5405     start = A->rmap->rstart;
5406     end   = A->rmap->rend;
5407     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5408   } else {
5409     isrowa = *row;
5410   }
5411   if (!col) {
5412     start = A->cmap->rstart;
5413     cmap  = a->garray;
5414     nzA   = a->A->cmap->n;
5415     nzB   = a->B->cmap->n;
5416     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5417     ncols = 0;
5418     for (i = 0; i < nzB; i++) {
5419       if (cmap[i] < start) idx[ncols++] = cmap[i];
5420       else break;
5421     }
5422     imark = i;
5423     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5424     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5425     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5426   } else {
5427     iscola = *col;
5428   }
5429   if (scall != MAT_INITIAL_MATRIX) {
5430     PetscCall(PetscMalloc1(1, &aloc));
5431     aloc[0] = *A_loc;
5432   }
5433   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5434   if (!col) { /* attach global id of condensed columns */
5435     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5436   }
5437   *A_loc = aloc[0];
5438   PetscCall(PetscFree(aloc));
5439   if (!row) PetscCall(ISDestroy(&isrowa));
5440   if (!col) PetscCall(ISDestroy(&iscola));
5441   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5442   PetscFunctionReturn(PETSC_SUCCESS);
5443 }
5444 
5445 /*
5446  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5447  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5448  * on a global size.
5449  * */
5450 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5451 {
5452   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5453   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5454   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5455   PetscMPIInt            owner;
5456   PetscSFNode           *iremote, *oiremote;
5457   const PetscInt        *lrowindices;
5458   PetscSF                sf, osf;
5459   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5460   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5461   MPI_Comm               comm;
5462   ISLocalToGlobalMapping mapping;
5463   const PetscScalar     *pd_a, *po_a;
5464 
5465   PetscFunctionBegin;
5466   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5467   /* plocalsize is the number of roots
5468    * nrows is the number of leaves
5469    * */
5470   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5471   PetscCall(ISGetLocalSize(rows, &nrows));
5472   PetscCall(PetscCalloc1(nrows, &iremote));
5473   PetscCall(ISGetIndices(rows, &lrowindices));
5474   for (i = 0; i < nrows; i++) {
5475     /* Find a remote index and an owner for a row
5476      * The row could be local or remote
5477      * */
5478     owner = 0;
5479     lidx  = 0;
5480     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5481     iremote[i].index = lidx;
5482     iremote[i].rank  = owner;
5483   }
5484   /* Create SF to communicate how many nonzero columns for each row */
5485   PetscCall(PetscSFCreate(comm, &sf));
5486   /* SF will figure out the number of nonzero columns for each row, and their
5487    * offsets
5488    * */
5489   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5490   PetscCall(PetscSFSetFromOptions(sf));
5491   PetscCall(PetscSFSetUp(sf));
5492 
5493   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5494   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5495   PetscCall(PetscCalloc1(nrows, &pnnz));
5496   roffsets[0] = 0;
5497   roffsets[1] = 0;
5498   for (i = 0; i < plocalsize; i++) {
5499     /* diagonal */
5500     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5501     /* off-diagonal */
5502     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5503     /* compute offsets so that we relative location for each row */
5504     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5505     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5506   }
5507   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5508   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5509   /* 'r' means root, and 'l' means leaf */
5510   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5511   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5512   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5513   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5514   PetscCall(PetscSFDestroy(&sf));
5515   PetscCall(PetscFree(roffsets));
5516   PetscCall(PetscFree(nrcols));
5517   dntotalcols = 0;
5518   ontotalcols = 0;
5519   ncol        = 0;
5520   for (i = 0; i < nrows; i++) {
5521     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5522     ncol    = PetscMax(pnnz[i], ncol);
5523     /* diagonal */
5524     dntotalcols += nlcols[i * 2 + 0];
5525     /* off-diagonal */
5526     ontotalcols += nlcols[i * 2 + 1];
5527   }
5528   /* We do not need to figure the right number of columns
5529    * since all the calculations will be done by going through the raw data
5530    * */
5531   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5532   PetscCall(MatSetUp(*P_oth));
5533   PetscCall(PetscFree(pnnz));
5534   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5535   /* diagonal */
5536   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5537   /* off-diagonal */
5538   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5539   /* diagonal */
5540   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5541   /* off-diagonal */
5542   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5543   dntotalcols = 0;
5544   ontotalcols = 0;
5545   ntotalcols  = 0;
5546   for (i = 0; i < nrows; i++) {
5547     owner = 0;
5548     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5549     /* Set iremote for diag matrix */
5550     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5551       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5552       iremote[dntotalcols].rank  = owner;
5553       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5554       ilocal[dntotalcols++] = ntotalcols++;
5555     }
5556     /* off-diagonal */
5557     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5558       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5559       oiremote[ontotalcols].rank  = owner;
5560       oilocal[ontotalcols++]      = ntotalcols++;
5561     }
5562   }
5563   PetscCall(ISRestoreIndices(rows, &lrowindices));
5564   PetscCall(PetscFree(loffsets));
5565   PetscCall(PetscFree(nlcols));
5566   PetscCall(PetscSFCreate(comm, &sf));
5567   /* P serves as roots and P_oth is leaves
5568    * Diag matrix
5569    * */
5570   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5571   PetscCall(PetscSFSetFromOptions(sf));
5572   PetscCall(PetscSFSetUp(sf));
5573 
5574   PetscCall(PetscSFCreate(comm, &osf));
5575   /* off-diagonal */
5576   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5577   PetscCall(PetscSFSetFromOptions(osf));
5578   PetscCall(PetscSFSetUp(osf));
5579   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5580   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5581   /* operate on the matrix internal data to save memory */
5582   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5583   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5584   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5585   /* Convert to global indices for diag matrix */
5586   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5587   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5588   /* We want P_oth store global indices */
5589   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5590   /* Use memory scalable approach */
5591   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5592   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5593   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5594   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5595   /* Convert back to local indices */
5596   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5597   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5598   nout = 0;
5599   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5600   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5601   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5602   /* Exchange values */
5603   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5604   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5605   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5606   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5607   /* Stop PETSc from shrinking memory */
5608   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5609   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5610   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5611   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5612   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5613   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5614   PetscCall(PetscSFDestroy(&sf));
5615   PetscCall(PetscSFDestroy(&osf));
5616   PetscFunctionReturn(PETSC_SUCCESS);
5617 }
5618 
5619 /*
5620  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5621  * This supports MPIAIJ and MAIJ
5622  * */
5623 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5624 {
5625   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5626   Mat_SeqAIJ *p_oth;
5627   IS          rows, map;
5628   PetscHMapI  hamp;
5629   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5630   MPI_Comm    comm;
5631   PetscSF     sf, osf;
5632   PetscBool   has;
5633 
5634   PetscFunctionBegin;
5635   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5636   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5637   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5638    *  and then create a submatrix (that often is an overlapping matrix)
5639    * */
5640   if (reuse == MAT_INITIAL_MATRIX) {
5641     /* Use a hash table to figure out unique keys */
5642     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5643     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5644     count = 0;
5645     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5646     for (i = 0; i < a->B->cmap->n; i++) {
5647       key = a->garray[i] / dof;
5648       PetscCall(PetscHMapIHas(hamp, key, &has));
5649       if (!has) {
5650         mapping[i] = count;
5651         PetscCall(PetscHMapISet(hamp, key, count++));
5652       } else {
5653         /* Current 'i' has the same value the previous step */
5654         mapping[i] = count - 1;
5655       }
5656     }
5657     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5658     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5659     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5660     PetscCall(PetscCalloc1(htsize, &rowindices));
5661     off = 0;
5662     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5663     PetscCall(PetscHMapIDestroy(&hamp));
5664     PetscCall(PetscSortInt(htsize, rowindices));
5665     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5666     /* In case, the matrix was already created but users want to recreate the matrix */
5667     PetscCall(MatDestroy(P_oth));
5668     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5669     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5670     PetscCall(ISDestroy(&map));
5671     PetscCall(ISDestroy(&rows));
5672   } else if (reuse == MAT_REUSE_MATRIX) {
5673     /* If matrix was already created, we simply update values using SF objects
5674      * that as attached to the matrix earlier.
5675      */
5676     const PetscScalar *pd_a, *po_a;
5677 
5678     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5679     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5680     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5681     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5682     /* Update values in place */
5683     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5684     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5685     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5686     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5687     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5688     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5689     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5690     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5691   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5692   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5693   PetscFunctionReturn(PETSC_SUCCESS);
5694 }
5695 
5696 /*@C
5697   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5698 
5699   Collective
5700 
5701   Input Parameters:
5702 + A     - the first matrix in `MATMPIAIJ` format
5703 . B     - the second matrix in `MATMPIAIJ` format
5704 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5705 
5706   Output Parameters:
5707 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5708 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5709 - B_seq - the sequential matrix generated
5710 
5711   Level: developer
5712 
5713 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5714 @*/
5715 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5716 {
5717   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5718   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5719   IS          isrowb, iscolb;
5720   Mat        *bseq = NULL;
5721 
5722   PetscFunctionBegin;
5723   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5724              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5725   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5726 
5727   if (scall == MAT_INITIAL_MATRIX) {
5728     start = A->cmap->rstart;
5729     cmap  = a->garray;
5730     nzA   = a->A->cmap->n;
5731     nzB   = a->B->cmap->n;
5732     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5733     ncols = 0;
5734     for (i = 0; i < nzB; i++) { /* row < local row index */
5735       if (cmap[i] < start) idx[ncols++] = cmap[i];
5736       else break;
5737     }
5738     imark = i;
5739     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5740     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5741     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5742     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5743   } else {
5744     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5745     isrowb = *rowb;
5746     iscolb = *colb;
5747     PetscCall(PetscMalloc1(1, &bseq));
5748     bseq[0] = *B_seq;
5749   }
5750   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5751   *B_seq = bseq[0];
5752   PetscCall(PetscFree(bseq));
5753   if (!rowb) {
5754     PetscCall(ISDestroy(&isrowb));
5755   } else {
5756     *rowb = isrowb;
5757   }
5758   if (!colb) {
5759     PetscCall(ISDestroy(&iscolb));
5760   } else {
5761     *colb = iscolb;
5762   }
5763   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5764   PetscFunctionReturn(PETSC_SUCCESS);
5765 }
5766 
5767 /*
5768     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5769     of the OFF-DIAGONAL portion of local A
5770 
5771     Collective
5772 
5773    Input Parameters:
5774 +    A,B - the matrices in `MATMPIAIJ` format
5775 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5776 
5777    Output Parameter:
5778 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5779 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5780 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5781 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5782 
5783     Developer Note:
5784     This directly accesses information inside the VecScatter associated with the matrix-vector product
5785      for this matrix. This is not desirable..
5786 
5787     Level: developer
5788 
5789 */
5790 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5791 {
5792   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5793   Mat_SeqAIJ        *b_oth;
5794   VecScatter         ctx;
5795   MPI_Comm           comm;
5796   const PetscMPIInt *rprocs, *sprocs;
5797   const PetscInt    *srow, *rstarts, *sstarts;
5798   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5799   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5800   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5801   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5802   PetscMPIInt        size, tag, rank, nreqs;
5803 
5804   PetscFunctionBegin;
5805   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5806   PetscCallMPI(MPI_Comm_size(comm, &size));
5807 
5808   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5809              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5810   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5811   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5812 
5813   if (size == 1) {
5814     startsj_s = NULL;
5815     bufa_ptr  = NULL;
5816     *B_oth    = NULL;
5817     PetscFunctionReturn(PETSC_SUCCESS);
5818   }
5819 
5820   ctx = a->Mvctx;
5821   tag = ((PetscObject)ctx)->tag;
5822 
5823   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5824   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5825   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5826   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5827   PetscCall(PetscMalloc1(nreqs, &reqs));
5828   rwaits = reqs;
5829   swaits = reqs + nrecvs;
5830 
5831   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5832   if (scall == MAT_INITIAL_MATRIX) {
5833     /* i-array */
5834     /*  post receives */
5835     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5836     for (i = 0; i < nrecvs; i++) {
5837       rowlen = rvalues + rstarts[i] * rbs;
5838       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5839       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5840     }
5841 
5842     /* pack the outgoing message */
5843     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5844 
5845     sstartsj[0] = 0;
5846     rstartsj[0] = 0;
5847     len         = 0; /* total length of j or a array to be sent */
5848     if (nsends) {
5849       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5850       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5851     }
5852     for (i = 0; i < nsends; i++) {
5853       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5854       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5855       for (j = 0; j < nrows; j++) {
5856         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5857         for (l = 0; l < sbs; l++) {
5858           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5859 
5860           rowlen[j * sbs + l] = ncols;
5861 
5862           len += ncols;
5863           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5864         }
5865         k++;
5866       }
5867       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5868 
5869       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5870     }
5871     /* recvs and sends of i-array are completed */
5872     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5873     PetscCall(PetscFree(svalues));
5874 
5875     /* allocate buffers for sending j and a arrays */
5876     PetscCall(PetscMalloc1(len + 1, &bufj));
5877     PetscCall(PetscMalloc1(len + 1, &bufa));
5878 
5879     /* create i-array of B_oth */
5880     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5881 
5882     b_othi[0] = 0;
5883     len       = 0; /* total length of j or a array to be received */
5884     k         = 0;
5885     for (i = 0; i < nrecvs; i++) {
5886       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5887       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5888       for (j = 0; j < nrows; j++) {
5889         b_othi[k + 1] = b_othi[k] + rowlen[j];
5890         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5891         k++;
5892       }
5893       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5894     }
5895     PetscCall(PetscFree(rvalues));
5896 
5897     /* allocate space for j and a arrays of B_oth */
5898     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5899     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5900 
5901     /* j-array */
5902     /*  post receives of j-array */
5903     for (i = 0; i < nrecvs; i++) {
5904       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5905       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5906     }
5907 
5908     /* pack the outgoing message j-array */
5909     if (nsends) k = sstarts[0];
5910     for (i = 0; i < nsends; i++) {
5911       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5912       bufJ  = bufj + sstartsj[i];
5913       for (j = 0; j < nrows; j++) {
5914         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5915         for (ll = 0; ll < sbs; ll++) {
5916           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5917           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5918           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5919         }
5920       }
5921       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5922     }
5923 
5924     /* recvs and sends of j-array are completed */
5925     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5926   } else if (scall == MAT_REUSE_MATRIX) {
5927     sstartsj = *startsj_s;
5928     rstartsj = *startsj_r;
5929     bufa     = *bufa_ptr;
5930     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5931     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5932   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5933 
5934   /* a-array */
5935   /*  post receives of a-array */
5936   for (i = 0; i < nrecvs; i++) {
5937     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5938     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5939   }
5940 
5941   /* pack the outgoing message a-array */
5942   if (nsends) k = sstarts[0];
5943   for (i = 0; i < nsends; i++) {
5944     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5945     bufA  = bufa + sstartsj[i];
5946     for (j = 0; j < nrows; j++) {
5947       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5948       for (ll = 0; ll < sbs; ll++) {
5949         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5950         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5951         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5952       }
5953     }
5954     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5955   }
5956   /* recvs and sends of a-array are completed */
5957   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5958   PetscCall(PetscFree(reqs));
5959 
5960   if (scall == MAT_INITIAL_MATRIX) {
5961     /* put together the new matrix */
5962     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5963 
5964     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5965     /* Since these are PETSc arrays, change flags to free them as necessary. */
5966     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5967     b_oth->free_a  = PETSC_TRUE;
5968     b_oth->free_ij = PETSC_TRUE;
5969     b_oth->nonew   = 0;
5970 
5971     PetscCall(PetscFree(bufj));
5972     if (!startsj_s || !bufa_ptr) {
5973       PetscCall(PetscFree2(sstartsj, rstartsj));
5974       PetscCall(PetscFree(bufa_ptr));
5975     } else {
5976       *startsj_s = sstartsj;
5977       *startsj_r = rstartsj;
5978       *bufa_ptr  = bufa;
5979     }
5980   } else if (scall == MAT_REUSE_MATRIX) {
5981     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5982   }
5983 
5984   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
5985   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
5986   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5987   PetscFunctionReturn(PETSC_SUCCESS);
5988 }
5989 
5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5991 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
5993 #if defined(PETSC_HAVE_MKL_SPARSE)
5994 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5995 #endif
5996 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5997 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
5998 #if defined(PETSC_HAVE_ELEMENTAL)
5999 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6000 #endif
6001 #if defined(PETSC_HAVE_SCALAPACK)
6002 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6003 #endif
6004 #if defined(PETSC_HAVE_HYPRE)
6005 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6006 #endif
6007 #if defined(PETSC_HAVE_CUDA)
6008 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6009 #endif
6010 #if defined(PETSC_HAVE_HIP)
6011 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6012 #endif
6013 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6014 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6015 #endif
6016 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6017 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6018 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6019 
6020 /*
6021     Computes (B'*A')' since computing B*A directly is untenable
6022 
6023                n                       p                          p
6024         [             ]       [             ]         [                 ]
6025       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6026         [             ]       [             ]         [                 ]
6027 
6028 */
6029 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6030 {
6031   Mat At, Bt, Ct;
6032 
6033   PetscFunctionBegin;
6034   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6035   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6036   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6037   PetscCall(MatDestroy(&At));
6038   PetscCall(MatDestroy(&Bt));
6039   PetscCall(MatTransposeSetPrecursor(Ct, C));
6040   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6041   PetscCall(MatDestroy(&Ct));
6042   PetscFunctionReturn(PETSC_SUCCESS);
6043 }
6044 
6045 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6046 {
6047   PetscBool cisdense;
6048 
6049   PetscFunctionBegin;
6050   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6051   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6052   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6053   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6054   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6055   PetscCall(MatSetUp(C));
6056 
6057   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6058   PetscFunctionReturn(PETSC_SUCCESS);
6059 }
6060 
6061 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6062 {
6063   Mat_Product *product = C->product;
6064   Mat          A = product->A, B = product->B;
6065 
6066   PetscFunctionBegin;
6067   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6068              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6069   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6070   C->ops->productsymbolic = MatProductSymbolic_AB;
6071   PetscFunctionReturn(PETSC_SUCCESS);
6072 }
6073 
6074 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6075 {
6076   Mat_Product *product = C->product;
6077 
6078   PetscFunctionBegin;
6079   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6080   PetscFunctionReturn(PETSC_SUCCESS);
6081 }
6082 
6083 /*
6084    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6085 
6086   Input Parameters:
6087 
6088     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6089     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6090 
6091     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6092 
6093     For Set1, j1[] contains column indices of the nonzeros.
6094     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6095     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6096     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6097 
6098     Similar for Set2.
6099 
6100     This routine merges the two sets of nonzeros row by row and removes repeats.
6101 
6102   Output Parameters: (memory is allocated by the caller)
6103 
6104     i[],j[]: the CSR of the merged matrix, which has m rows.
6105     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6106     imap2[]: similar to imap1[], but for Set2.
6107     Note we order nonzeros row-by-row and from left to right.
6108 */
6109 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6110 {
6111   PetscInt   r, m; /* Row index of mat */
6112   PetscCount t, t1, t2, b1, e1, b2, e2;
6113 
6114   PetscFunctionBegin;
6115   PetscCall(MatGetLocalSize(mat, &m, NULL));
6116   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6117   i[0]        = 0;
6118   for (r = 0; r < m; r++) { /* Do row by row merging */
6119     b1 = rowBegin1[r];
6120     e1 = rowEnd1[r];
6121     b2 = rowBegin2[r];
6122     e2 = rowEnd2[r];
6123     while (b1 < e1 && b2 < e2) {
6124       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6125         j[t]      = j1[b1];
6126         imap1[t1] = t;
6127         imap2[t2] = t;
6128         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6129         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6130         t1++;
6131         t2++;
6132         t++;
6133       } else if (j1[b1] < j2[b2]) {
6134         j[t]      = j1[b1];
6135         imap1[t1] = t;
6136         b1 += jmap1[t1 + 1] - jmap1[t1];
6137         t1++;
6138         t++;
6139       } else {
6140         j[t]      = j2[b2];
6141         imap2[t2] = t;
6142         b2 += jmap2[t2 + 1] - jmap2[t2];
6143         t2++;
6144         t++;
6145       }
6146     }
6147     /* Merge the remaining in either j1[] or j2[] */
6148     while (b1 < e1) {
6149       j[t]      = j1[b1];
6150       imap1[t1] = t;
6151       b1 += jmap1[t1 + 1] - jmap1[t1];
6152       t1++;
6153       t++;
6154     }
6155     while (b2 < e2) {
6156       j[t]      = j2[b2];
6157       imap2[t2] = t;
6158       b2 += jmap2[t2 + 1] - jmap2[t2];
6159       t2++;
6160       t++;
6161     }
6162     i[r + 1] = t;
6163   }
6164   PetscFunctionReturn(PETSC_SUCCESS);
6165 }
6166 
6167 /*
6168   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6169 
6170   Input Parameters:
6171     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6172     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6173       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6174 
6175       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6176       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6177 
6178   Output Parameters:
6179     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6180     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6181       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6182       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6183 
6184     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6185       Atot: number of entries belonging to the diagonal block.
6186       Annz: number of unique nonzeros belonging to the diagonal block.
6187       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6188         repeats (i.e., same 'i,j' pair).
6189       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6190         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6191 
6192       Atot: number of entries belonging to the diagonal block
6193       Annz: number of unique nonzeros belonging to the diagonal block.
6194 
6195     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6196 
6197     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6198 */
6199 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6200 {
6201   PetscInt    cstart, cend, rstart, rend, row, col;
6202   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6203   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6204   PetscCount  k, m, p, q, r, s, mid;
6205   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6206 
6207   PetscFunctionBegin;
6208   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6209   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6210   m = rend - rstart;
6211 
6212   /* Skip negative rows */
6213   for (k = 0; k < n; k++)
6214     if (i[k] >= 0) break;
6215 
6216   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6217      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6218   */
6219   while (k < n) {
6220     row = i[k];
6221     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6222     for (s = k; s < n; s++)
6223       if (i[s] != row) break;
6224 
6225     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6226     for (p = k; p < s; p++) {
6227       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6228       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6229     }
6230     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6231     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6232     rowBegin[row - rstart] = k;
6233     rowMid[row - rstart]   = mid;
6234     rowEnd[row - rstart]   = s;
6235 
6236     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6237     Atot += mid - k;
6238     Btot += s - mid;
6239 
6240     /* Count unique nonzeros of this diag row */
6241     for (p = k; p < mid;) {
6242       col = j[p];
6243       do {
6244         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6245         p++;
6246       } while (p < mid && j[p] == col);
6247       Annz++;
6248     }
6249 
6250     /* Count unique nonzeros of this offdiag row */
6251     for (p = mid; p < s;) {
6252       col = j[p];
6253       do {
6254         p++;
6255       } while (p < s && j[p] == col);
6256       Bnnz++;
6257     }
6258     k = s;
6259   }
6260 
6261   /* Allocation according to Atot, Btot, Annz, Bnnz */
6262   PetscCall(PetscMalloc1(Atot, &Aperm));
6263   PetscCall(PetscMalloc1(Btot, &Bperm));
6264   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6265   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6266 
6267   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6268   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6269   for (r = 0; r < m; r++) {
6270     k   = rowBegin[r];
6271     mid = rowMid[r];
6272     s   = rowEnd[r];
6273     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6274     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6275     Atot += mid - k;
6276     Btot += s - mid;
6277 
6278     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6279     for (p = k; p < mid;) {
6280       col = j[p];
6281       q   = p;
6282       do {
6283         p++;
6284       } while (p < mid && j[p] == col);
6285       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6286       Annz++;
6287     }
6288 
6289     for (p = mid; p < s;) {
6290       col = j[p];
6291       q   = p;
6292       do {
6293         p++;
6294       } while (p < s && j[p] == col);
6295       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6296       Bnnz++;
6297     }
6298   }
6299   /* Output */
6300   *Aperm_ = Aperm;
6301   *Annz_  = Annz;
6302   *Atot_  = Atot;
6303   *Ajmap_ = Ajmap;
6304   *Bperm_ = Bperm;
6305   *Bnnz_  = Bnnz;
6306   *Btot_  = Btot;
6307   *Bjmap_ = Bjmap;
6308   PetscFunctionReturn(PETSC_SUCCESS);
6309 }
6310 
6311 /*
6312   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6313 
6314   Input Parameters:
6315     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6316     nnz:  number of unique nonzeros in the merged matrix
6317     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6318     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6319 
6320   Output Parameter: (memory is allocated by the caller)
6321     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6322 
6323   Example:
6324     nnz1 = 4
6325     nnz  = 6
6326     imap = [1,3,4,5]
6327     jmap = [0,3,5,6,7]
6328    then,
6329     jmap_new = [0,0,3,3,5,6,7]
6330 */
6331 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6332 {
6333   PetscCount k, p;
6334 
6335   PetscFunctionBegin;
6336   jmap_new[0] = 0;
6337   p           = nnz;                /* p loops over jmap_new[] backwards */
6338   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6339     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6340   }
6341   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6342   PetscFunctionReturn(PETSC_SUCCESS);
6343 }
6344 
6345 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6346 {
6347   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6348 
6349   PetscFunctionBegin;
6350   PetscCall(PetscSFDestroy(&coo->sf));
6351   PetscCall(PetscFree(coo->Aperm1));
6352   PetscCall(PetscFree(coo->Bperm1));
6353   PetscCall(PetscFree(coo->Ajmap1));
6354   PetscCall(PetscFree(coo->Bjmap1));
6355   PetscCall(PetscFree(coo->Aimap2));
6356   PetscCall(PetscFree(coo->Bimap2));
6357   PetscCall(PetscFree(coo->Aperm2));
6358   PetscCall(PetscFree(coo->Bperm2));
6359   PetscCall(PetscFree(coo->Ajmap2));
6360   PetscCall(PetscFree(coo->Bjmap2));
6361   PetscCall(PetscFree(coo->Cperm1));
6362   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6363   PetscCall(PetscFree(coo));
6364   PetscFunctionReturn(PETSC_SUCCESS);
6365 }
6366 
6367 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6368 {
6369   MPI_Comm             comm;
6370   PetscMPIInt          rank, size;
6371   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6372   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6373   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6374   PetscContainer       container;
6375   MatCOOStruct_MPIAIJ *coo;
6376 
6377   PetscFunctionBegin;
6378   PetscCall(PetscFree(mpiaij->garray));
6379   PetscCall(VecDestroy(&mpiaij->lvec));
6380 #if defined(PETSC_USE_CTABLE)
6381   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6382 #else
6383   PetscCall(PetscFree(mpiaij->colmap));
6384 #endif
6385   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6386   mat->assembled     = PETSC_FALSE;
6387   mat->was_assembled = PETSC_FALSE;
6388 
6389   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6390   PetscCallMPI(MPI_Comm_size(comm, &size));
6391   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6392   PetscCall(PetscLayoutSetUp(mat->rmap));
6393   PetscCall(PetscLayoutSetUp(mat->cmap));
6394   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6395   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6396   PetscCall(MatGetLocalSize(mat, &m, &n));
6397   PetscCall(MatGetSize(mat, &M, &N));
6398 
6399   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6400   /* entries come first, then local rows, then remote rows.                     */
6401   PetscCount n1 = coo_n, *perm1;
6402   PetscInt  *i1 = coo_i, *j1 = coo_j;
6403 
6404   PetscCall(PetscMalloc1(n1, &perm1));
6405   for (k = 0; k < n1; k++) perm1[k] = k;
6406 
6407   /* Manipulate indices so that entries with negative row or col indices will have smallest
6408      row indices, local entries will have greater but negative row indices, and remote entries
6409      will have positive row indices.
6410   */
6411   for (k = 0; k < n1; k++) {
6412     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6413     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6414     else {
6415       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6416       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6417     }
6418   }
6419 
6420   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6421   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6422 
6423   /* Advance k to the first entry we need to take care of */
6424   for (k = 0; k < n1; k++)
6425     if (i1[k] > PETSC_MIN_INT) break;
6426   PetscInt i1start = k;
6427 
6428   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6429   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6430 
6431   /*           Send remote rows to their owner                                  */
6432   /* Find which rows should be sent to which remote ranks*/
6433   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6434   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6435   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6436   const PetscInt *ranges;
6437   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6438 
6439   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6440   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6441   for (k = rem; k < n1;) {
6442     PetscMPIInt owner;
6443     PetscInt    firstRow, lastRow;
6444 
6445     /* Locate a row range */
6446     firstRow = i1[k]; /* first row of this owner */
6447     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6448     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6449 
6450     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6451     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6452 
6453     /* All entries in [k,p) belong to this remote owner */
6454     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6455       PetscMPIInt *sendto2;
6456       PetscInt    *nentries2;
6457       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6458 
6459       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6460       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6461       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6462       PetscCall(PetscFree2(sendto, nentries2));
6463       sendto   = sendto2;
6464       nentries = nentries2;
6465       maxNsend = maxNsend2;
6466     }
6467     sendto[nsend]   = owner;
6468     nentries[nsend] = p - k;
6469     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6470     nsend++;
6471     k = p;
6472   }
6473 
6474   /* Build 1st SF to know offsets on remote to send data */
6475   PetscSF      sf1;
6476   PetscInt     nroots = 1, nroots2 = 0;
6477   PetscInt     nleaves = nsend, nleaves2 = 0;
6478   PetscInt    *offsets;
6479   PetscSFNode *iremote;
6480 
6481   PetscCall(PetscSFCreate(comm, &sf1));
6482   PetscCall(PetscMalloc1(nsend, &iremote));
6483   PetscCall(PetscMalloc1(nsend, &offsets));
6484   for (k = 0; k < nsend; k++) {
6485     iremote[k].rank  = sendto[k];
6486     iremote[k].index = 0;
6487     nleaves2 += nentries[k];
6488     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6489   }
6490   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6491   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6492   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6493   PetscCall(PetscSFDestroy(&sf1));
6494   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6495 
6496   /* Build 2nd SF to send remote COOs to their owner */
6497   PetscSF sf2;
6498   nroots  = nroots2;
6499   nleaves = nleaves2;
6500   PetscCall(PetscSFCreate(comm, &sf2));
6501   PetscCall(PetscSFSetFromOptions(sf2));
6502   PetscCall(PetscMalloc1(nleaves, &iremote));
6503   p = 0;
6504   for (k = 0; k < nsend; k++) {
6505     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6506     for (q = 0; q < nentries[k]; q++, p++) {
6507       iremote[p].rank  = sendto[k];
6508       iremote[p].index = offsets[k] + q;
6509     }
6510   }
6511   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6512 
6513   /* Send the remote COOs to their owner */
6514   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6515   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6516   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6517   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6518   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6519   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6520   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6521 
6522   PetscCall(PetscFree(offsets));
6523   PetscCall(PetscFree2(sendto, nentries));
6524 
6525   /* Sort received COOs by row along with the permutation array     */
6526   for (k = 0; k < n2; k++) perm2[k] = k;
6527   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6528 
6529   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6530   PetscCount *Cperm1;
6531   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6532   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves));
6533 
6534   /* Support for HYPRE matrices, kind of a hack.
6535      Swap min column with diagonal so that diagonal values will go first */
6536   PetscBool   hypre;
6537   const char *name;
6538   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6539   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6540   if (hypre) {
6541     PetscInt *minj;
6542     PetscBT   hasdiag;
6543 
6544     PetscCall(PetscBTCreate(m, &hasdiag));
6545     PetscCall(PetscMalloc1(m, &minj));
6546     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6547     for (k = i1start; k < rem; k++) {
6548       if (j1[k] < cstart || j1[k] >= cend) continue;
6549       const PetscInt rindex = i1[k] - rstart;
6550       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6551       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6552     }
6553     for (k = 0; k < n2; k++) {
6554       if (j2[k] < cstart || j2[k] >= cend) continue;
6555       const PetscInt rindex = i2[k] - rstart;
6556       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6557       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6558     }
6559     for (k = i1start; k < rem; k++) {
6560       const PetscInt rindex = i1[k] - rstart;
6561       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6562       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6563       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6564     }
6565     for (k = 0; k < n2; k++) {
6566       const PetscInt rindex = i2[k] - rstart;
6567       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6568       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6569       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6570     }
6571     PetscCall(PetscBTDestroy(&hasdiag));
6572     PetscCall(PetscFree(minj));
6573   }
6574 
6575   /* Split local COOs and received COOs into diag/offdiag portions */
6576   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6577   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6578   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6579   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6580   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6581   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6582 
6583   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6584   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6585   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6586   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6587 
6588   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6589   PetscInt *Ai, *Bi;
6590   PetscInt *Aj, *Bj;
6591 
6592   PetscCall(PetscMalloc1(m + 1, &Ai));
6593   PetscCall(PetscMalloc1(m + 1, &Bi));
6594   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6595   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6596 
6597   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6598   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6599   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6600   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6601   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6602 
6603   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6604   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6605 
6606   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6607   /* expect nonzeros in A/B most likely have local contributing entries        */
6608   PetscInt    Annz = Ai[m];
6609   PetscInt    Bnnz = Bi[m];
6610   PetscCount *Ajmap1_new, *Bjmap1_new;
6611 
6612   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6613   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6614 
6615   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6616   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6617 
6618   PetscCall(PetscFree(Aimap1));
6619   PetscCall(PetscFree(Ajmap1));
6620   PetscCall(PetscFree(Bimap1));
6621   PetscCall(PetscFree(Bjmap1));
6622   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6623   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6624   PetscCall(PetscFree(perm1));
6625   PetscCall(PetscFree3(i2, j2, perm2));
6626 
6627   Ajmap1 = Ajmap1_new;
6628   Bjmap1 = Bjmap1_new;
6629 
6630   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6631   if (Annz < Annz1 + Annz2) {
6632     PetscInt *Aj_new;
6633     PetscCall(PetscMalloc1(Annz, &Aj_new));
6634     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6635     PetscCall(PetscFree(Aj));
6636     Aj = Aj_new;
6637   }
6638 
6639   if (Bnnz < Bnnz1 + Bnnz2) {
6640     PetscInt *Bj_new;
6641     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6642     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6643     PetscCall(PetscFree(Bj));
6644     Bj = Bj_new;
6645   }
6646 
6647   /* Create new submatrices for on-process and off-process coupling                  */
6648   PetscScalar     *Aa, *Ba;
6649   MatType          rtype;
6650   Mat_SeqAIJ      *a, *b;
6651   PetscObjectState state;
6652   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6653   PetscCall(PetscCalloc1(Bnnz, &Ba));
6654   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6655   if (cstart) {
6656     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6657   }
6658   PetscCall(MatDestroy(&mpiaij->A));
6659   PetscCall(MatDestroy(&mpiaij->B));
6660   PetscCall(MatGetRootType_Private(mat, &rtype));
6661   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6662   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6663   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6664   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6665   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6666   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6667 
6668   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6669   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6670   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6671   a->free_a = b->free_a = PETSC_TRUE;
6672   a->free_ij = b->free_ij = PETSC_TRUE;
6673 
6674   /* conversion must happen AFTER multiply setup */
6675   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6676   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6677   PetscCall(VecDestroy(&mpiaij->lvec));
6678   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6679 
6680   // Put the COO struct in a container and then attach that to the matrix
6681   PetscCall(PetscMalloc1(1, &coo));
6682   coo->n       = coo_n;
6683   coo->sf      = sf2;
6684   coo->sendlen = nleaves;
6685   coo->recvlen = nroots;
6686   coo->Annz    = Annz;
6687   coo->Bnnz    = Bnnz;
6688   coo->Annz2   = Annz2;
6689   coo->Bnnz2   = Bnnz2;
6690   coo->Atot1   = Atot1;
6691   coo->Atot2   = Atot2;
6692   coo->Btot1   = Btot1;
6693   coo->Btot2   = Btot2;
6694   coo->Ajmap1  = Ajmap1;
6695   coo->Aperm1  = Aperm1;
6696   coo->Bjmap1  = Bjmap1;
6697   coo->Bperm1  = Bperm1;
6698   coo->Aimap2  = Aimap2;
6699   coo->Ajmap2  = Ajmap2;
6700   coo->Aperm2  = Aperm2;
6701   coo->Bimap2  = Bimap2;
6702   coo->Bjmap2  = Bjmap2;
6703   coo->Bperm2  = Bperm2;
6704   coo->Cperm1  = Cperm1;
6705   // Allocate in preallocation. If not used, it has zero cost on host
6706   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6707   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6708   PetscCall(PetscContainerSetPointer(container, coo));
6709   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6710   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6711   PetscCall(PetscContainerDestroy(&container));
6712   PetscFunctionReturn(PETSC_SUCCESS);
6713 }
6714 
6715 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6716 {
6717   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6718   Mat                  A = mpiaij->A, B = mpiaij->B;
6719   PetscScalar         *Aa, *Ba;
6720   PetscScalar         *sendbuf, *recvbuf;
6721   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6722   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6723   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6724   const PetscCount    *Cperm1;
6725   PetscContainer       container;
6726   MatCOOStruct_MPIAIJ *coo;
6727 
6728   PetscFunctionBegin;
6729   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6730   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6731   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6732   sendbuf = coo->sendbuf;
6733   recvbuf = coo->recvbuf;
6734   Ajmap1  = coo->Ajmap1;
6735   Ajmap2  = coo->Ajmap2;
6736   Aimap2  = coo->Aimap2;
6737   Bjmap1  = coo->Bjmap1;
6738   Bjmap2  = coo->Bjmap2;
6739   Bimap2  = coo->Bimap2;
6740   Aperm1  = coo->Aperm1;
6741   Aperm2  = coo->Aperm2;
6742   Bperm1  = coo->Bperm1;
6743   Bperm2  = coo->Bperm2;
6744   Cperm1  = coo->Cperm1;
6745 
6746   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6747   PetscCall(MatSeqAIJGetArray(B, &Ba));
6748 
6749   /* Pack entries to be sent to remote */
6750   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6751 
6752   /* Send remote entries to their owner and overlap the communication with local computation */
6753   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6754   /* Add local entries to A and B */
6755   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6756     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6757     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6758     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6759   }
6760   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6761     PetscScalar sum = 0.0;
6762     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6763     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6764   }
6765   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6766 
6767   /* Add received remote entries to A and B */
6768   for (PetscCount i = 0; i < coo->Annz2; i++) {
6769     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6770   }
6771   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6772     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6773   }
6774   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6775   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6776   PetscFunctionReturn(PETSC_SUCCESS);
6777 }
6778 
6779 /*MC
6780    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6781 
6782    Options Database Keys:
6783 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6784 
6785    Level: beginner
6786 
6787    Notes:
6788    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6789     in this case the values associated with the rows and columns one passes in are set to zero
6790     in the matrix
6791 
6792     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6793     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6794 
6795 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6796 M*/
6797 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6798 {
6799   Mat_MPIAIJ *b;
6800   PetscMPIInt size;
6801 
6802   PetscFunctionBegin;
6803   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6804 
6805   PetscCall(PetscNew(&b));
6806   B->data       = (void *)b;
6807   B->ops[0]     = MatOps_Values;
6808   B->assembled  = PETSC_FALSE;
6809   B->insertmode = NOT_SET_VALUES;
6810   b->size       = size;
6811 
6812   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6813 
6814   /* build cache for off array entries formed */
6815   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6816 
6817   b->donotstash  = PETSC_FALSE;
6818   b->colmap      = NULL;
6819   b->garray      = NULL;
6820   b->roworiented = PETSC_TRUE;
6821 
6822   /* stuff used for matrix vector multiply */
6823   b->lvec  = NULL;
6824   b->Mvctx = NULL;
6825 
6826   /* stuff for MatGetRow() */
6827   b->rowindices   = NULL;
6828   b->rowvalues    = NULL;
6829   b->getrowactive = PETSC_FALSE;
6830 
6831   /* flexible pointer used in CUSPARSE classes */
6832   b->spptr = NULL;
6833 
6834   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6835   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6837   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6842   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6843   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6844 #if defined(PETSC_HAVE_CUDA)
6845   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6846 #endif
6847 #if defined(PETSC_HAVE_HIP)
6848   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6849 #endif
6850 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6851   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6852 #endif
6853 #if defined(PETSC_HAVE_MKL_SPARSE)
6854   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6855 #endif
6856   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6858   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6859   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6860 #if defined(PETSC_HAVE_ELEMENTAL)
6861   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6862 #endif
6863 #if defined(PETSC_HAVE_SCALAPACK)
6864   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6865 #endif
6866   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6867   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6868 #if defined(PETSC_HAVE_HYPRE)
6869   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6870   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6871 #endif
6872   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6873   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6874   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6875   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6876   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6877   PetscFunctionReturn(PETSC_SUCCESS);
6878 }
6879 
6880 /*@C
6881   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6882   and "off-diagonal" part of the matrix in CSR format.
6883 
6884   Collective
6885 
6886   Input Parameters:
6887 + comm - MPI communicator
6888 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6889 . n    - This value should be the same as the local size used in creating the
6890        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6891        calculated if `N` is given) For square matrices `n` is almost always `m`.
6892 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6893 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6894 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6895 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6896 . a    - matrix values
6897 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6898 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6899 - oa   - matrix values
6900 
6901   Output Parameter:
6902 . mat - the matrix
6903 
6904   Level: advanced
6905 
6906   Notes:
6907   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6908   must free the arrays once the matrix has been destroyed and not before.
6909 
6910   The `i` and `j` indices are 0 based
6911 
6912   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6913 
6914   This sets local rows and cannot be used to set off-processor values.
6915 
6916   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6917   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6918   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6919   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6920   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6921   communication if it is known that only local entries will be set.
6922 
6923 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6924           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6925 @*/
6926 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6927 {
6928   Mat_MPIAIJ *maij;
6929 
6930   PetscFunctionBegin;
6931   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6932   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6933   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6934   PetscCall(MatCreate(comm, mat));
6935   PetscCall(MatSetSizes(*mat, m, n, M, N));
6936   PetscCall(MatSetType(*mat, MATMPIAIJ));
6937   maij = (Mat_MPIAIJ *)(*mat)->data;
6938 
6939   (*mat)->preallocated = PETSC_TRUE;
6940 
6941   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6942   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6943 
6944   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6945   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6946 
6947   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6948   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6949   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6950   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6951   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6952   PetscFunctionReturn(PETSC_SUCCESS);
6953 }
6954 
6955 typedef struct {
6956   Mat       *mp;    /* intermediate products */
6957   PetscBool *mptmp; /* is the intermediate product temporary ? */
6958   PetscInt   cp;    /* number of intermediate products */
6959 
6960   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6961   PetscInt    *startsj_s, *startsj_r;
6962   PetscScalar *bufa;
6963   Mat          P_oth;
6964 
6965   /* may take advantage of merging product->B */
6966   Mat Bloc; /* B-local by merging diag and off-diag */
6967 
6968   /* cusparse does not have support to split between symbolic and numeric phases.
6969      When api_user is true, we don't need to update the numerical values
6970      of the temporary storage */
6971   PetscBool reusesym;
6972 
6973   /* support for COO values insertion */
6974   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6975   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6976   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6977   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6978   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6979   PetscMemType mtype;
6980 
6981   /* customization */
6982   PetscBool abmerge;
6983   PetscBool P_oth_bind;
6984 } MatMatMPIAIJBACKEND;
6985 
6986 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6987 {
6988   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6989   PetscInt             i;
6990 
6991   PetscFunctionBegin;
6992   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6993   PetscCall(PetscFree(mmdata->bufa));
6994   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6995   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6996   PetscCall(MatDestroy(&mmdata->P_oth));
6997   PetscCall(MatDestroy(&mmdata->Bloc));
6998   PetscCall(PetscSFDestroy(&mmdata->sf));
6999   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7000   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7001   PetscCall(PetscFree(mmdata->own[0]));
7002   PetscCall(PetscFree(mmdata->own));
7003   PetscCall(PetscFree(mmdata->off[0]));
7004   PetscCall(PetscFree(mmdata->off));
7005   PetscCall(PetscFree(mmdata));
7006   PetscFunctionReturn(PETSC_SUCCESS);
7007 }
7008 
7009 /* Copy selected n entries with indices in idx[] of A to v[].
7010    If idx is NULL, copy the whole data array of A to v[]
7011  */
7012 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7013 {
7014   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7015 
7016   PetscFunctionBegin;
7017   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7018   if (f) {
7019     PetscCall((*f)(A, n, idx, v));
7020   } else {
7021     const PetscScalar *vv;
7022 
7023     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7024     if (n && idx) {
7025       PetscScalar    *w  = v;
7026       const PetscInt *oi = idx;
7027       PetscInt        j;
7028 
7029       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7030     } else {
7031       PetscCall(PetscArraycpy(v, vv, n));
7032     }
7033     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7034   }
7035   PetscFunctionReturn(PETSC_SUCCESS);
7036 }
7037 
7038 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7039 {
7040   MatMatMPIAIJBACKEND *mmdata;
7041   PetscInt             i, n_d, n_o;
7042 
7043   PetscFunctionBegin;
7044   MatCheckProduct(C, 1);
7045   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7046   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7047   if (!mmdata->reusesym) { /* update temporary matrices */
7048     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7049     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7050   }
7051   mmdata->reusesym = PETSC_FALSE;
7052 
7053   for (i = 0; i < mmdata->cp; i++) {
7054     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7055     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7056   }
7057   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7058     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7059 
7060     if (mmdata->mptmp[i]) continue;
7061     if (noff) {
7062       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7063 
7064       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7065       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7066       n_o += noff;
7067       n_d += nown;
7068     } else {
7069       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7070 
7071       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7072       n_d += mm->nz;
7073     }
7074   }
7075   if (mmdata->hasoffproc) { /* offprocess insertion */
7076     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7077     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7078   }
7079   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7080   PetscFunctionReturn(PETSC_SUCCESS);
7081 }
7082 
7083 /* Support for Pt * A, A * P, or Pt * A * P */
7084 #define MAX_NUMBER_INTERMEDIATE 4
7085 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7086 {
7087   Mat_Product           *product = C->product;
7088   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7089   Mat_MPIAIJ            *a, *p;
7090   MatMatMPIAIJBACKEND   *mmdata;
7091   ISLocalToGlobalMapping P_oth_l2g = NULL;
7092   IS                     glob      = NULL;
7093   const char            *prefix;
7094   char                   pprefix[256];
7095   const PetscInt        *globidx, *P_oth_idx;
7096   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7097   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7098   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7099                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7100                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7101   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7102 
7103   MatProductType ptype;
7104   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7105   PetscMPIInt    size;
7106 
7107   PetscFunctionBegin;
7108   MatCheckProduct(C, 1);
7109   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7110   ptype = product->type;
7111   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7112     ptype                                          = MATPRODUCT_AB;
7113     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7114   }
7115   switch (ptype) {
7116   case MATPRODUCT_AB:
7117     A          = product->A;
7118     P          = product->B;
7119     m          = A->rmap->n;
7120     n          = P->cmap->n;
7121     M          = A->rmap->N;
7122     N          = P->cmap->N;
7123     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7124     break;
7125   case MATPRODUCT_AtB:
7126     P          = product->A;
7127     A          = product->B;
7128     m          = P->cmap->n;
7129     n          = A->cmap->n;
7130     M          = P->cmap->N;
7131     N          = A->cmap->N;
7132     hasoffproc = PETSC_TRUE;
7133     break;
7134   case MATPRODUCT_PtAP:
7135     A          = product->A;
7136     P          = product->B;
7137     m          = P->cmap->n;
7138     n          = P->cmap->n;
7139     M          = P->cmap->N;
7140     N          = P->cmap->N;
7141     hasoffproc = PETSC_TRUE;
7142     break;
7143   default:
7144     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7145   }
7146   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7147   if (size == 1) hasoffproc = PETSC_FALSE;
7148 
7149   /* defaults */
7150   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7151     mp[i]    = NULL;
7152     mptmp[i] = PETSC_FALSE;
7153     rmapt[i] = -1;
7154     cmapt[i] = -1;
7155     rmapa[i] = NULL;
7156     cmapa[i] = NULL;
7157   }
7158 
7159   /* customization */
7160   PetscCall(PetscNew(&mmdata));
7161   mmdata->reusesym = product->api_user;
7162   if (ptype == MATPRODUCT_AB) {
7163     if (product->api_user) {
7164       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7165       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7166       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7167       PetscOptionsEnd();
7168     } else {
7169       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7170       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7171       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7172       PetscOptionsEnd();
7173     }
7174   } else if (ptype == MATPRODUCT_PtAP) {
7175     if (product->api_user) {
7176       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7177       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7178       PetscOptionsEnd();
7179     } else {
7180       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7181       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7182       PetscOptionsEnd();
7183     }
7184   }
7185   a = (Mat_MPIAIJ *)A->data;
7186   p = (Mat_MPIAIJ *)P->data;
7187   PetscCall(MatSetSizes(C, m, n, M, N));
7188   PetscCall(PetscLayoutSetUp(C->rmap));
7189   PetscCall(PetscLayoutSetUp(C->cmap));
7190   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7191   PetscCall(MatGetOptionsPrefix(C, &prefix));
7192 
7193   cp = 0;
7194   switch (ptype) {
7195   case MATPRODUCT_AB: /* A * P */
7196     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7197 
7198     /* A_diag * P_local (merged or not) */
7199     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7200       /* P is product->B */
7201       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7202       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7203       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7204       PetscCall(MatProductSetFill(mp[cp], product->fill));
7205       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7206       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7207       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7208       mp[cp]->product->api_user = product->api_user;
7209       PetscCall(MatProductSetFromOptions(mp[cp]));
7210       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7211       PetscCall(ISGetIndices(glob, &globidx));
7212       rmapt[cp] = 1;
7213       cmapt[cp] = 2;
7214       cmapa[cp] = globidx;
7215       mptmp[cp] = PETSC_FALSE;
7216       cp++;
7217     } else { /* A_diag * P_diag and A_diag * P_off */
7218       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7219       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7220       PetscCall(MatProductSetFill(mp[cp], product->fill));
7221       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7222       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7223       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7224       mp[cp]->product->api_user = product->api_user;
7225       PetscCall(MatProductSetFromOptions(mp[cp]));
7226       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7227       rmapt[cp] = 1;
7228       cmapt[cp] = 1;
7229       mptmp[cp] = PETSC_FALSE;
7230       cp++;
7231       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7232       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7233       PetscCall(MatProductSetFill(mp[cp], product->fill));
7234       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7235       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7236       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7237       mp[cp]->product->api_user = product->api_user;
7238       PetscCall(MatProductSetFromOptions(mp[cp]));
7239       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7240       rmapt[cp] = 1;
7241       cmapt[cp] = 2;
7242       cmapa[cp] = p->garray;
7243       mptmp[cp] = PETSC_FALSE;
7244       cp++;
7245     }
7246 
7247     /* A_off * P_other */
7248     if (mmdata->P_oth) {
7249       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7250       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7251       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7252       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7253       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7254       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7255       PetscCall(MatProductSetFill(mp[cp], product->fill));
7256       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7257       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7258       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7259       mp[cp]->product->api_user = product->api_user;
7260       PetscCall(MatProductSetFromOptions(mp[cp]));
7261       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7262       rmapt[cp] = 1;
7263       cmapt[cp] = 2;
7264       cmapa[cp] = P_oth_idx;
7265       mptmp[cp] = PETSC_FALSE;
7266       cp++;
7267     }
7268     break;
7269 
7270   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7271     /* A is product->B */
7272     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7273     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7274       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7275       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7276       PetscCall(MatProductSetFill(mp[cp], product->fill));
7277       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7278       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7279       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7280       mp[cp]->product->api_user = product->api_user;
7281       PetscCall(MatProductSetFromOptions(mp[cp]));
7282       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7283       PetscCall(ISGetIndices(glob, &globidx));
7284       rmapt[cp] = 2;
7285       rmapa[cp] = globidx;
7286       cmapt[cp] = 2;
7287       cmapa[cp] = globidx;
7288       mptmp[cp] = PETSC_FALSE;
7289       cp++;
7290     } else {
7291       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7292       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7293       PetscCall(MatProductSetFill(mp[cp], product->fill));
7294       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7295       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7296       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7297       mp[cp]->product->api_user = product->api_user;
7298       PetscCall(MatProductSetFromOptions(mp[cp]));
7299       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7300       PetscCall(ISGetIndices(glob, &globidx));
7301       rmapt[cp] = 1;
7302       cmapt[cp] = 2;
7303       cmapa[cp] = globidx;
7304       mptmp[cp] = PETSC_FALSE;
7305       cp++;
7306       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7307       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7308       PetscCall(MatProductSetFill(mp[cp], product->fill));
7309       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7310       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7311       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7312       mp[cp]->product->api_user = product->api_user;
7313       PetscCall(MatProductSetFromOptions(mp[cp]));
7314       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7315       rmapt[cp] = 2;
7316       rmapa[cp] = p->garray;
7317       cmapt[cp] = 2;
7318       cmapa[cp] = globidx;
7319       mptmp[cp] = PETSC_FALSE;
7320       cp++;
7321     }
7322     break;
7323   case MATPRODUCT_PtAP:
7324     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7325     /* P is product->B */
7326     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7327     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7328     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7329     PetscCall(MatProductSetFill(mp[cp], product->fill));
7330     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7331     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7332     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7333     mp[cp]->product->api_user = product->api_user;
7334     PetscCall(MatProductSetFromOptions(mp[cp]));
7335     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7336     PetscCall(ISGetIndices(glob, &globidx));
7337     rmapt[cp] = 2;
7338     rmapa[cp] = globidx;
7339     cmapt[cp] = 2;
7340     cmapa[cp] = globidx;
7341     mptmp[cp] = PETSC_FALSE;
7342     cp++;
7343     if (mmdata->P_oth) {
7344       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7345       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7346       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7347       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7348       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7349       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7350       PetscCall(MatProductSetFill(mp[cp], product->fill));
7351       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7352       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7353       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7354       mp[cp]->product->api_user = product->api_user;
7355       PetscCall(MatProductSetFromOptions(mp[cp]));
7356       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7357       mptmp[cp] = PETSC_TRUE;
7358       cp++;
7359       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7360       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7361       PetscCall(MatProductSetFill(mp[cp], product->fill));
7362       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7363       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7364       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7365       mp[cp]->product->api_user = product->api_user;
7366       PetscCall(MatProductSetFromOptions(mp[cp]));
7367       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7368       rmapt[cp] = 2;
7369       rmapa[cp] = globidx;
7370       cmapt[cp] = 2;
7371       cmapa[cp] = P_oth_idx;
7372       mptmp[cp] = PETSC_FALSE;
7373       cp++;
7374     }
7375     break;
7376   default:
7377     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7378   }
7379   /* sanity check */
7380   if (size > 1)
7381     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7382 
7383   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7384   for (i = 0; i < cp; i++) {
7385     mmdata->mp[i]    = mp[i];
7386     mmdata->mptmp[i] = mptmp[i];
7387   }
7388   mmdata->cp             = cp;
7389   C->product->data       = mmdata;
7390   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7391   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7392 
7393   /* memory type */
7394   mmdata->mtype = PETSC_MEMTYPE_HOST;
7395   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7396   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7397   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7398   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7399   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7400   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7401 
7402   /* prepare coo coordinates for values insertion */
7403 
7404   /* count total nonzeros of those intermediate seqaij Mats
7405     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7406     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7407     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7408   */
7409   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7410     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7411     if (mptmp[cp]) continue;
7412     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7413       const PetscInt *rmap = rmapa[cp];
7414       const PetscInt  mr   = mp[cp]->rmap->n;
7415       const PetscInt  rs   = C->rmap->rstart;
7416       const PetscInt  re   = C->rmap->rend;
7417       const PetscInt *ii   = mm->i;
7418       for (i = 0; i < mr; i++) {
7419         const PetscInt gr = rmap[i];
7420         const PetscInt nz = ii[i + 1] - ii[i];
7421         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7422         else ncoo_oown += nz;                  /* this row is local */
7423       }
7424     } else ncoo_d += mm->nz;
7425   }
7426 
7427   /*
7428     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7429 
7430     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7431 
7432     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7433 
7434     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7435     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7436     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7437 
7438     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7439     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7440   */
7441   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7442   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7443 
7444   /* gather (i,j) of nonzeros inserted by remote procs */
7445   if (hasoffproc) {
7446     PetscSF  msf;
7447     PetscInt ncoo2, *coo_i2, *coo_j2;
7448 
7449     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7450     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7451     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7452 
7453     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7454       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7455       PetscInt   *idxoff = mmdata->off[cp];
7456       PetscInt   *idxown = mmdata->own[cp];
7457       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7458         const PetscInt *rmap = rmapa[cp];
7459         const PetscInt *cmap = cmapa[cp];
7460         const PetscInt *ii   = mm->i;
7461         PetscInt       *coi  = coo_i + ncoo_o;
7462         PetscInt       *coj  = coo_j + ncoo_o;
7463         const PetscInt  mr   = mp[cp]->rmap->n;
7464         const PetscInt  rs   = C->rmap->rstart;
7465         const PetscInt  re   = C->rmap->rend;
7466         const PetscInt  cs   = C->cmap->rstart;
7467         for (i = 0; i < mr; i++) {
7468           const PetscInt *jj = mm->j + ii[i];
7469           const PetscInt  gr = rmap[i];
7470           const PetscInt  nz = ii[i + 1] - ii[i];
7471           if (gr < rs || gr >= re) { /* this is an offproc row */
7472             for (j = ii[i]; j < ii[i + 1]; j++) {
7473               *coi++    = gr;
7474               *idxoff++ = j;
7475             }
7476             if (!cmapt[cp]) { /* already global */
7477               for (j = 0; j < nz; j++) *coj++ = jj[j];
7478             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7479               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7480             } else { /* offdiag */
7481               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7482             }
7483             ncoo_o += nz;
7484           } else { /* this is a local row */
7485             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7486           }
7487         }
7488       }
7489       mmdata->off[cp + 1] = idxoff;
7490       mmdata->own[cp + 1] = idxown;
7491     }
7492 
7493     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7494     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7495     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7496     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7497     ncoo = ncoo_d + ncoo_oown + ncoo2;
7498     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7499     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7500     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7501     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7502     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7503     PetscCall(PetscFree2(coo_i, coo_j));
7504     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7505     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7506     coo_i = coo_i2;
7507     coo_j = coo_j2;
7508   } else { /* no offproc values insertion */
7509     ncoo = ncoo_d;
7510     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7511 
7512     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7513     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7514     PetscCall(PetscSFSetUp(mmdata->sf));
7515   }
7516   mmdata->hasoffproc = hasoffproc;
7517 
7518   /* gather (i,j) of nonzeros inserted locally */
7519   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7520     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7521     PetscInt       *coi  = coo_i + ncoo_d;
7522     PetscInt       *coj  = coo_j + ncoo_d;
7523     const PetscInt *jj   = mm->j;
7524     const PetscInt *ii   = mm->i;
7525     const PetscInt *cmap = cmapa[cp];
7526     const PetscInt *rmap = rmapa[cp];
7527     const PetscInt  mr   = mp[cp]->rmap->n;
7528     const PetscInt  rs   = C->rmap->rstart;
7529     const PetscInt  re   = C->rmap->rend;
7530     const PetscInt  cs   = C->cmap->rstart;
7531 
7532     if (mptmp[cp]) continue;
7533     if (rmapt[cp] == 1) { /* consecutive rows */
7534       /* fill coo_i */
7535       for (i = 0; i < mr; i++) {
7536         const PetscInt gr = i + rs;
7537         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7538       }
7539       /* fill coo_j */
7540       if (!cmapt[cp]) { /* type-0, already global */
7541         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7542       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7543         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7544       } else {                                            /* type-2, local to global for sparse columns */
7545         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7546       }
7547       ncoo_d += mm->nz;
7548     } else if (rmapt[cp] == 2) { /* sparse rows */
7549       for (i = 0; i < mr; i++) {
7550         const PetscInt *jj = mm->j + ii[i];
7551         const PetscInt  gr = rmap[i];
7552         const PetscInt  nz = ii[i + 1] - ii[i];
7553         if (gr >= rs && gr < re) { /* local rows */
7554           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7555           if (!cmapt[cp]) { /* type-0, already global */
7556             for (j = 0; j < nz; j++) *coj++ = jj[j];
7557           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7558             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7559           } else { /* type-2, local to global for sparse columns */
7560             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7561           }
7562           ncoo_d += nz;
7563         }
7564       }
7565     }
7566   }
7567   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7568   PetscCall(ISDestroy(&glob));
7569   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7570   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7571   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7572   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7573 
7574   /* preallocate with COO data */
7575   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7576   PetscCall(PetscFree2(coo_i, coo_j));
7577   PetscFunctionReturn(PETSC_SUCCESS);
7578 }
7579 
7580 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7581 {
7582   Mat_Product *product = mat->product;
7583 #if defined(PETSC_HAVE_DEVICE)
7584   PetscBool match  = PETSC_FALSE;
7585   PetscBool usecpu = PETSC_FALSE;
7586 #else
7587   PetscBool match = PETSC_TRUE;
7588 #endif
7589 
7590   PetscFunctionBegin;
7591   MatCheckProduct(mat, 1);
7592 #if defined(PETSC_HAVE_DEVICE)
7593   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7594   if (match) { /* we can always fallback to the CPU if requested */
7595     switch (product->type) {
7596     case MATPRODUCT_AB:
7597       if (product->api_user) {
7598         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7599         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7600         PetscOptionsEnd();
7601       } else {
7602         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7603         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7604         PetscOptionsEnd();
7605       }
7606       break;
7607     case MATPRODUCT_AtB:
7608       if (product->api_user) {
7609         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7610         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7611         PetscOptionsEnd();
7612       } else {
7613         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7614         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7615         PetscOptionsEnd();
7616       }
7617       break;
7618     case MATPRODUCT_PtAP:
7619       if (product->api_user) {
7620         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7621         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7622         PetscOptionsEnd();
7623       } else {
7624         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7625         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7626         PetscOptionsEnd();
7627       }
7628       break;
7629     default:
7630       break;
7631     }
7632     match = (PetscBool)!usecpu;
7633   }
7634 #endif
7635   if (match) {
7636     switch (product->type) {
7637     case MATPRODUCT_AB:
7638     case MATPRODUCT_AtB:
7639     case MATPRODUCT_PtAP:
7640       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7641       break;
7642     default:
7643       break;
7644     }
7645   }
7646   /* fallback to MPIAIJ ops */
7647   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7648   PetscFunctionReturn(PETSC_SUCCESS);
7649 }
7650 
7651 /*
7652    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7653 
7654    n - the number of block indices in cc[]
7655    cc - the block indices (must be large enough to contain the indices)
7656 */
7657 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7658 {
7659   PetscInt        cnt = -1, nidx, j;
7660   const PetscInt *idx;
7661 
7662   PetscFunctionBegin;
7663   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7664   if (nidx) {
7665     cnt     = 0;
7666     cc[cnt] = idx[0] / bs;
7667     for (j = 1; j < nidx; j++) {
7668       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7669     }
7670   }
7671   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7672   *n = cnt + 1;
7673   PetscFunctionReturn(PETSC_SUCCESS);
7674 }
7675 
7676 /*
7677     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7678 
7679     ncollapsed - the number of block indices
7680     collapsed - the block indices (must be large enough to contain the indices)
7681 */
7682 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7683 {
7684   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7685 
7686   PetscFunctionBegin;
7687   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7688   for (i = start + 1; i < start + bs; i++) {
7689     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7690     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7691     cprevtmp = cprev;
7692     cprev    = merged;
7693     merged   = cprevtmp;
7694   }
7695   *ncollapsed = nprev;
7696   if (collapsed) *collapsed = cprev;
7697   PetscFunctionReturn(PETSC_SUCCESS);
7698 }
7699 
7700 /*
7701  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7702 
7703  Input Parameter:
7704  . Amat - matrix
7705  - symmetrize - make the result symmetric
7706  + scale - scale with diagonal
7707 
7708  Output Parameter:
7709  . a_Gmat - output scalar graph >= 0
7710 
7711 */
7712 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7713 {
7714   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7715   MPI_Comm  comm;
7716   Mat       Gmat;
7717   PetscBool ismpiaij, isseqaij;
7718   Mat       a, b, c;
7719   MatType   jtype;
7720 
7721   PetscFunctionBegin;
7722   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7723   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7724   PetscCall(MatGetSize(Amat, &MM, &NN));
7725   PetscCall(MatGetBlockSize(Amat, &bs));
7726   nloc = (Iend - Istart) / bs;
7727 
7728   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7729   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7730   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7731 
7732   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7733   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7734      implementation */
7735   if (bs > 1) {
7736     PetscCall(MatGetType(Amat, &jtype));
7737     PetscCall(MatCreate(comm, &Gmat));
7738     PetscCall(MatSetType(Gmat, jtype));
7739     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7740     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7741     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7742       PetscInt  *d_nnz, *o_nnz;
7743       MatScalar *aa, val, *AA;
7744       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7745       if (isseqaij) {
7746         a = Amat;
7747         b = NULL;
7748       } else {
7749         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7750         a             = d->A;
7751         b             = d->B;
7752       }
7753       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7754       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7755       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7756         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7757         const PetscInt *cols1, *cols2;
7758         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7759           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7760           nnz[brow / bs] = nc2 / bs;
7761           if (nc2 % bs) ok = 0;
7762           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7763           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7764             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7765             if (nc1 != nc2) ok = 0;
7766             else {
7767               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7768                 if (cols1[jj] != cols2[jj]) ok = 0;
7769                 if (cols1[jj] % bs != jj % bs) ok = 0;
7770               }
7771             }
7772             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7773           }
7774           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7775           if (!ok) {
7776             PetscCall(PetscFree2(d_nnz, o_nnz));
7777             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7778             goto old_bs;
7779           }
7780         }
7781       }
7782       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7783       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7784       PetscCall(PetscFree2(d_nnz, o_nnz));
7785       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7786       // diag
7787       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7788         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7789         ai               = aseq->i;
7790         n                = ai[brow + 1] - ai[brow];
7791         aj               = aseq->j + ai[brow];
7792         for (int k = 0; k < n; k += bs) {        // block columns
7793           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7794           val        = 0;
7795           if (index_size == 0) {
7796             for (int ii = 0; ii < bs; ii++) { // rows in block
7797               aa = aseq->a + ai[brow + ii] + k;
7798               for (int jj = 0; jj < bs; jj++) {         // columns in block
7799                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7800               }
7801             }
7802           } else {                                       // use (index,index) value if provided
7803             for (int iii = 0; iii < index_size; iii++) { // rows in block
7804               int ii = index[iii];
7805               aa     = aseq->a + ai[brow + ii] + k;
7806               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7807                 int jj = index[jjj];
7808                 val    = PetscAbs(PetscRealPart(aa[jj]));
7809               }
7810             }
7811           }
7812           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7813           AA[k / bs] = val;
7814         }
7815         grow = Istart / bs + brow / bs;
7816         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7817       }
7818       // off-diag
7819       if (ismpiaij) {
7820         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7821         const PetscScalar *vals;
7822         const PetscInt    *cols, *garray = aij->garray;
7823         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7824         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7825           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7826           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7827             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7828             AA[k / bs] = 0;
7829             AJ[cidx]   = garray[cols[k]] / bs;
7830           }
7831           nc = ncols / bs;
7832           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7833           if (index_size == 0) {
7834             for (int ii = 0; ii < bs; ii++) { // rows in block
7835               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7836               for (int k = 0; k < ncols; k += bs) {
7837                 for (int jj = 0; jj < bs; jj++) { // cols in block
7838                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7839                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7840                 }
7841               }
7842               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7843             }
7844           } else {                                       // use (index,index) value if provided
7845             for (int iii = 0; iii < index_size; iii++) { // rows in block
7846               int ii = index[iii];
7847               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7848               for (int k = 0; k < ncols; k += bs) {
7849                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7850                   int jj = index[jjj];
7851                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7852                 }
7853               }
7854               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7855             }
7856           }
7857           grow = Istart / bs + brow / bs;
7858           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7859         }
7860       }
7861       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7862       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7863       PetscCall(PetscFree2(AA, AJ));
7864     } else {
7865       const PetscScalar *vals;
7866       const PetscInt    *idx;
7867       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7868     old_bs:
7869       /*
7870        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7871        */
7872       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7873       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7874       if (isseqaij) {
7875         PetscInt max_d_nnz;
7876         /*
7877          Determine exact preallocation count for (sequential) scalar matrix
7878          */
7879         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7880         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7881         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7882         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7883         PetscCall(PetscFree3(w0, w1, w2));
7884       } else if (ismpiaij) {
7885         Mat             Daij, Oaij;
7886         const PetscInt *garray;
7887         PetscInt        max_d_nnz;
7888         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7889         /*
7890          Determine exact preallocation count for diagonal block portion of scalar matrix
7891          */
7892         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7893         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7894         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7895         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7896         PetscCall(PetscFree3(w0, w1, w2));
7897         /*
7898          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7899          */
7900         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7901           o_nnz[jj] = 0;
7902           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7903             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7904             o_nnz[jj] += ncols;
7905             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7906           }
7907           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7908         }
7909       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7910       /* get scalar copy (norms) of matrix */
7911       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7912       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7913       PetscCall(PetscFree2(d_nnz, o_nnz));
7914       for (Ii = Istart; Ii < Iend; Ii++) {
7915         PetscInt dest_row = Ii / bs;
7916         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7917         for (jj = 0; jj < ncols; jj++) {
7918           PetscInt    dest_col = idx[jj] / bs;
7919           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7920           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7921         }
7922         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7923       }
7924       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7925       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7926     }
7927   } else {
7928     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7929     else {
7930       Gmat = Amat;
7931       PetscCall(PetscObjectReference((PetscObject)Gmat));
7932     }
7933     if (isseqaij) {
7934       a = Gmat;
7935       b = NULL;
7936     } else {
7937       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7938       a             = d->A;
7939       b             = d->B;
7940     }
7941     if (filter >= 0 || scale) {
7942       /* take absolute value of each entry */
7943       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7944         MatInfo      info;
7945         PetscScalar *avals;
7946         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7947         PetscCall(MatSeqAIJGetArray(c, &avals));
7948         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7949         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7950       }
7951     }
7952   }
7953   if (symmetrize) {
7954     PetscBool isset, issym;
7955     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7956     if (!isset || !issym) {
7957       Mat matTrans;
7958       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7959       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7960       PetscCall(MatDestroy(&matTrans));
7961     }
7962     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7963   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7964   if (scale) {
7965     /* scale c for all diagonal values = 1 or -1 */
7966     Vec diag;
7967     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7968     PetscCall(MatGetDiagonal(Gmat, diag));
7969     PetscCall(VecReciprocal(diag));
7970     PetscCall(VecSqrtAbs(diag));
7971     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7972     PetscCall(VecDestroy(&diag));
7973   }
7974   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7975 
7976   if (filter >= 0) {
7977     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
7978     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
7979   }
7980   *a_Gmat = Gmat;
7981   PetscFunctionReturn(PETSC_SUCCESS);
7982 }
7983 
7984 /*
7985     Special version for direct calls from Fortran
7986 */
7987 #include <petsc/private/fortranimpl.h>
7988 
7989 /* Change these macros so can be used in void function */
7990 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7991 #undef PetscCall
7992 #define PetscCall(...) \
7993   do { \
7994     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
7995     if (PetscUnlikely(ierr_msv_mpiaij)) { \
7996       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
7997       return; \
7998     } \
7999   } while (0)
8000 
8001 #undef SETERRQ
8002 #define SETERRQ(comm, ierr, ...) \
8003   do { \
8004     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8005     return; \
8006   } while (0)
8007 
8008 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8009   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8010 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8011   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8012 #else
8013 #endif
8014 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8015 {
8016   Mat         mat = *mmat;
8017   PetscInt    m = *mm, n = *mn;
8018   InsertMode  addv = *maddv;
8019   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8020   PetscScalar value;
8021 
8022   MatCheckPreallocated(mat, 1);
8023   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8024   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8025   {
8026     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8027     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8028     PetscBool roworiented = aij->roworiented;
8029 
8030     /* Some Variables required in the macro */
8031     Mat         A     = aij->A;
8032     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8033     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8034     MatScalar  *aa;
8035     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8036     Mat         B                 = aij->B;
8037     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8038     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8039     MatScalar  *ba;
8040     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8041      * cannot use "#if defined" inside a macro. */
8042     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8043 
8044     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8045     PetscInt   nonew = a->nonew;
8046     MatScalar *ap1, *ap2;
8047 
8048     PetscFunctionBegin;
8049     PetscCall(MatSeqAIJGetArray(A, &aa));
8050     PetscCall(MatSeqAIJGetArray(B, &ba));
8051     for (i = 0; i < m; i++) {
8052       if (im[i] < 0) continue;
8053       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8054       if (im[i] >= rstart && im[i] < rend) {
8055         row      = im[i] - rstart;
8056         lastcol1 = -1;
8057         rp1      = aj + ai[row];
8058         ap1      = aa + ai[row];
8059         rmax1    = aimax[row];
8060         nrow1    = ailen[row];
8061         low1     = 0;
8062         high1    = nrow1;
8063         lastcol2 = -1;
8064         rp2      = bj + bi[row];
8065         ap2      = ba + bi[row];
8066         rmax2    = bimax[row];
8067         nrow2    = bilen[row];
8068         low2     = 0;
8069         high2    = nrow2;
8070 
8071         for (j = 0; j < n; j++) {
8072           if (roworiented) value = v[i * n + j];
8073           else value = v[i + j * m];
8074           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8075           if (in[j] >= cstart && in[j] < cend) {
8076             col = in[j] - cstart;
8077             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8078           } else if (in[j] < 0) continue;
8079           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8080             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8081           } else {
8082             if (mat->was_assembled) {
8083               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8084 #if defined(PETSC_USE_CTABLE)
8085               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8086               col--;
8087 #else
8088               col = aij->colmap[in[j]] - 1;
8089 #endif
8090               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8091                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8092                 col = in[j];
8093                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8094                 B        = aij->B;
8095                 b        = (Mat_SeqAIJ *)B->data;
8096                 bimax    = b->imax;
8097                 bi       = b->i;
8098                 bilen    = b->ilen;
8099                 bj       = b->j;
8100                 rp2      = bj + bi[row];
8101                 ap2      = ba + bi[row];
8102                 rmax2    = bimax[row];
8103                 nrow2    = bilen[row];
8104                 low2     = 0;
8105                 high2    = nrow2;
8106                 bm       = aij->B->rmap->n;
8107                 ba       = b->a;
8108                 inserted = PETSC_FALSE;
8109               }
8110             } else col = in[j];
8111             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8112           }
8113         }
8114       } else if (!aij->donotstash) {
8115         if (roworiented) {
8116           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8117         } else {
8118           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8119         }
8120       }
8121     }
8122     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8123     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8124   }
8125   PetscFunctionReturnVoid();
8126 }
8127 
8128 /* Undefining these here since they were redefined from their original definition above! No
8129  * other PETSc functions should be defined past this point, as it is impossible to recover the
8130  * original definitions */
8131 #undef PetscCall
8132 #undef SETERRQ
8133