xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0337bfe0b9dcc77abc5d44df0b7f57cdcdf2ff74)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309 
310   PetscFunctionBegin;
311   PetscCall(MatGetSize(A, &m, &n));
312   PetscCall(PetscCalloc1(n, &work));
313   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
314   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
316   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
317   if (type == NORM_2) {
318     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
319     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
320   } else if (type == NORM_1) {
321     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
322     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
323   } else if (type == NORM_INFINITY) {
324     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
325     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
327     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
328     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
330     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
331     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
332   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
333   if (type == NORM_INFINITY) {
334     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
335   } else {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
337   }
338   PetscCall(PetscFree(work));
339   if (type == NORM_2) {
340     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
341   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
342     for (i = 0; i < n; i++) reductions[i] /= m;
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
348 {
349   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
350   IS              sis, gis;
351   const PetscInt *isis, *igis;
352   PetscInt        n, *iis, nsis, ngis, rstart, i;
353 
354   PetscFunctionBegin;
355   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
356   PetscCall(MatFindNonzeroRows(a->B, &gis));
357   PetscCall(ISGetSize(gis, &ngis));
358   PetscCall(ISGetSize(sis, &nsis));
359   PetscCall(ISGetIndices(sis, &isis));
360   PetscCall(ISGetIndices(gis, &igis));
361 
362   PetscCall(PetscMalloc1(ngis + nsis, &iis));
363   PetscCall(PetscArraycpy(iis, igis, ngis));
364   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
365   n = ngis + nsis;
366   PetscCall(PetscSortRemoveDupsInt(&n, iis));
367   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
368   for (i = 0; i < n; i++) iis[i] += rstart;
369   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
370 
371   PetscCall(ISRestoreIndices(sis, &isis));
372   PetscCall(ISRestoreIndices(gis, &igis));
373   PetscCall(ISDestroy(&sis));
374   PetscCall(ISDestroy(&gis));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*
379   Local utility routine that creates a mapping from the global column
380 number to the local number in the off-diagonal part of the local
381 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
382 a slightly higher hash table cost; without it it is not scalable (each processor
383 has an order N integer array but is fast to access.
384 */
385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
386 {
387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
388   PetscInt    n   = aij->B->cmap->n, i;
389 
390   PetscFunctionBegin;
391   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
392 #if defined(PETSC_USE_CTABLE)
393   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
394   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
395 #else
396   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
397   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
398 #endif
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
403   do { \
404     if (col <= lastcol1) low1 = 0; \
405     else high1 = nrow1; \
406     lastcol1 = col; \
407     while (high1 - low1 > 5) { \
408       t = (low1 + high1) / 2; \
409       if (rp1[t] > col) high1 = t; \
410       else low1 = t; \
411     } \
412     for (_i = low1; _i < high1; _i++) { \
413       if (rp1[_i] > col) break; \
414       if (rp1[_i] == col) { \
415         if (addv == ADD_VALUES) { \
416           ap1[_i] += value; \
417           /* Not sure LogFlops will slow dow the code or not */ \
418           (void)PetscLogFlops(1.0); \
419         } else ap1[_i] = value; \
420         goto a_noinsert; \
421       } \
422     } \
423     if (value == 0.0 && ignorezeroentries && row != col) { \
424       low1  = 0; \
425       high1 = nrow1; \
426       goto a_noinsert; \
427     } \
428     if (nonew == 1) { \
429       low1  = 0; \
430       high1 = nrow1; \
431       goto a_noinsert; \
432     } \
433     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
434     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
435     N = nrow1++ - 1; \
436     a->nz++; \
437     high1++; \
438     /* shift up all the later entries in this row */ \
439     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
440     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
441     rp1[_i] = col; \
442     ap1[_i] = value; \
443   a_noinsert:; \
444     ailen[row] = nrow1; \
445   } while (0)
446 
447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
448   do { \
449     if (col <= lastcol2) low2 = 0; \
450     else high2 = nrow2; \
451     lastcol2 = col; \
452     while (high2 - low2 > 5) { \
453       t = (low2 + high2) / 2; \
454       if (rp2[t] > col) high2 = t; \
455       else low2 = t; \
456     } \
457     for (_i = low2; _i < high2; _i++) { \
458       if (rp2[_i] > col) break; \
459       if (rp2[_i] == col) { \
460         if (addv == ADD_VALUES) { \
461           ap2[_i] += value; \
462           (void)PetscLogFlops(1.0); \
463         } else ap2[_i] = value; \
464         goto b_noinsert; \
465       } \
466     } \
467     if (value == 0.0 && ignorezeroentries) { \
468       low2  = 0; \
469       high2 = nrow2; \
470       goto b_noinsert; \
471     } \
472     if (nonew == 1) { \
473       low2  = 0; \
474       high2 = nrow2; \
475       goto b_noinsert; \
476     } \
477     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
478     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
479     N = nrow2++ - 1; \
480     b->nz++; \
481     high2++; \
482     /* shift up all the later entries in this row */ \
483     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
484     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
485     rp2[_i] = col; \
486     ap2[_i] = value; \
487   b_noinsert:; \
488     bilen[row] = nrow2; \
489   } while (0)
490 
491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
492 {
493   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
494   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
495   PetscInt     l, *garray                         = mat->garray, diag;
496   PetscScalar *aa, *ba;
497 
498   PetscFunctionBegin;
499   /* code only works for square matrices A */
500 
501   /* find size of row to the left of the diagonal part */
502   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
503   row = row - diag;
504   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
505     if (garray[b->j[b->i[row] + l]] > diag) break;
506   }
507   if (l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512 
513   /* diagonal part */
514   if (a->i[row + 1] - a->i[row]) {
515     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
516     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
517     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
518   }
519 
520   /* right of diagonal part */
521   if (b->i[row + 1] - b->i[row] - l) {
522     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
523     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
524     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
530 {
531   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
532   PetscScalar value = 0.0;
533   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
534   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
535   PetscBool   roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat         A     = aij->A;
539   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
540   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
541   PetscBool   ignorezeroentries = a->ignorezeroentries;
542   Mat         B                 = aij->B;
543   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
544   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
545   MatScalar  *aa, *ba;
546   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
547   PetscInt    nonew;
548   MatScalar  *ap1, *ap2;
549 
550   PetscFunctionBegin;
551   PetscCall(MatSeqAIJGetArray(A, &aa));
552   PetscCall(MatSeqAIJGetArray(B, &ba));
553   for (i = 0; i < m; i++) {
554     if (im[i] < 0) continue;
555     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
560       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
567       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j = 0; j < n; j++) {
574         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
575         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
580         } else if (in[j] < 0) {
581           continue;
582         } else {
583           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
584           if (mat->was_assembled) {
585             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
586 #if defined(PETSC_USE_CTABLE)
587             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
593               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
594               col = in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ *)B->data;
598               bimax = b->imax;
599               bi    = b->i;
600               bilen = b->ilen;
601               bj    = b->j;
602               ba    = b->a;
603               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
604               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
612               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
613                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
614               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
619         }
620       }
621     } else {
622       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
627         } else {
628           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
629         }
630       }
631     }
632   }
633   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
634   PetscCall(MatSeqAIJRestoreArray(B, &ba));
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 /*
639     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
640     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
641     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
642 */
643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
644 {
645   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
646   Mat         A      = aij->A; /* diagonal part of the matrix */
647   Mat         B      = aij->B; /* off-diagonal part of the matrix */
648   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
649   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
650   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
651   PetscInt   *ailen = a->ilen, *aj = a->j;
652   PetscInt   *bilen = b->ilen, *bj = b->j;
653   PetscInt    am          = aij->A->rmap->n, j;
654   PetscInt    diag_so_far = 0, dnz;
655   PetscInt    offd_so_far = 0, onz;
656 
657   PetscFunctionBegin;
658   /* Iterate over all rows of the matrix */
659   for (j = 0; j < am; j++) {
660     dnz = onz = 0;
661     /*  Iterate over all non-zero columns of the current row */
662     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
663       /* If column is in the diagonal */
664       if (mat_j[col] >= cstart && mat_j[col] < cend) {
665         aj[diag_so_far++] = mat_j[col] - cstart;
666         dnz++;
667       } else { /* off-diagonal entries */
668         bj[offd_so_far++] = mat_j[col];
669         onz++;
670       }
671     }
672     ailen[j] = dnz;
673     bilen[j] = onz;
674   }
675   PetscFunctionReturn(PETSC_SUCCESS);
676 }
677 
678 /*
679     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
680     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
681     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
682     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
683     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
684 */
685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
686 {
687   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
688   Mat          A    = aij->A; /* diagonal part of the matrix */
689   Mat          B    = aij->B; /* off-diagonal part of the matrix */
690   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
691   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
692   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
693   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
694   PetscInt    *ailen = a->ilen, *aj = a->j;
695   PetscInt    *bilen = b->ilen, *bj = b->j;
696   PetscInt     am          = aij->A->rmap->n, j;
697   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
698   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
699   PetscScalar *aa = a->a, *ba = b->a;
700 
701   PetscFunctionBegin;
702   /* Iterate over all rows of the matrix */
703   for (j = 0; j < am; j++) {
704     dnz_row = onz_row = 0;
705     rowstart_offd     = full_offd_i[j];
706     rowstart_diag     = full_diag_i[j];
707     /*  Iterate over all non-zero columns of the current row */
708     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
709       /* If column is in the diagonal */
710       if (mat_j[col] >= cstart && mat_j[col] < cend) {
711         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
712         aa[rowstart_diag + dnz_row] = mat_a[col];
713         dnz_row++;
714       } else { /* off-diagonal entries */
715         bj[rowstart_offd + onz_row] = mat_j[col];
716         ba[rowstart_offd + onz_row] = mat_a[col];
717         onz_row++;
718       }
719     }
720     ailen[j] = dnz_row;
721     bilen[j] = onz_row;
722   }
723   PetscFunctionReturn(PETSC_SUCCESS);
724 }
725 
726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
727 {
728   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
729   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
730   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
731 
732   PetscFunctionBegin;
733   for (i = 0; i < m; i++) {
734     if (idxm[i] < 0) continue; /* negative row */
735     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
736     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
737     row = idxm[i] - rstart;
738     for (j = 0; j < n; j++) {
739       if (idxn[j] < 0) continue; /* negative column */
740       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
741       if (idxn[j] >= cstart && idxn[j] < cend) {
742         col = idxn[j] - cstart;
743         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
744       } else {
745         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
746 #if defined(PETSC_USE_CTABLE)
747         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
748         col--;
749 #else
750         col = aij->colmap[idxn[j]] - 1;
751 #endif
752         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
753         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
754       }
755     }
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
761 {
762   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
763   PetscInt    nstash, reallocs;
764 
765   PetscFunctionBegin;
766   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
767 
768   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
769   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
770   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773 
774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
775 {
776   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
777   PetscMPIInt  n;
778   PetscInt     i, j, rstart, ncols, flg;
779   PetscInt    *row, *col;
780   PetscBool    other_disassembled;
781   PetscScalar *val;
782 
783   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
784 
785   PetscFunctionBegin;
786   if (!aij->donotstash && !mat->nooffprocentries) {
787     while (1) {
788       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
789       if (!flg) break;
790 
791       for (i = 0; i < n;) {
792         /* Now identify the consecutive vals belonging to the same row */
793         for (j = i, rstart = row[j]; j < n; j++) {
794           if (row[j] != rstart) break;
795         }
796         if (j < n) ncols = j - i;
797         else ncols = n - i;
798         /* Now assemble all these values with a single function call */
799         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
800         i = j;
801       }
802     }
803     PetscCall(MatStashScatterEnd_Private(&mat->stash));
804   }
805 #if defined(PETSC_HAVE_DEVICE)
806   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
807   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
808   if (mat->boundtocpu) {
809     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
810     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
811   }
812 #endif
813   PetscCall(MatAssemblyBegin(aij->A, mode));
814   PetscCall(MatAssemblyEnd(aij->A, mode));
815 
816   /* determine if any processor has disassembled, if so we must
817      also disassemble ourself, in order that we may reassemble. */
818   /*
819      if nonzero structure of submatrix B cannot change then we know that
820      no processor disassembled thus we can skip this stuff
821   */
822   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
823     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
824     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
825       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
826     }
827   }
828   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
829   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
830 #if defined(PETSC_HAVE_DEVICE)
831   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
832 #endif
833   PetscCall(MatAssemblyBegin(aij->B, mode));
834   PetscCall(MatAssemblyEnd(aij->B, mode));
835 
836   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
837 
838   aij->rowvalues = NULL;
839 
840   PetscCall(VecDestroy(&aij->diag));
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
844     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
845     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
846   }
847 #if defined(PETSC_HAVE_DEVICE)
848   mat->offloadmask = PETSC_OFFLOAD_BOTH;
849 #endif
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
854 {
855   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
856 
857   PetscFunctionBegin;
858   PetscCall(MatZeroEntries(l->A));
859   PetscCall(MatZeroEntries(l->B));
860   PetscFunctionReturn(PETSC_SUCCESS);
861 }
862 
863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
864 {
865   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
866   PetscInt   *lrows;
867   PetscInt    r, len;
868   PetscBool   cong;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
873   PetscCall(MatHasCongruentLayouts(A, &cong));
874   /* fix right-hand side if needed */
875   if (x && b) {
876     const PetscScalar *xx;
877     PetscScalar       *bb;
878 
879     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
880     PetscCall(VecGetArrayRead(x, &xx));
881     PetscCall(VecGetArray(b, &bb));
882     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
883     PetscCall(VecRestoreArrayRead(x, &xx));
884     PetscCall(VecRestoreArray(b, &bb));
885   }
886 
887   if (diag != 0.0 && cong) {
888     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
889     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
890   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
891     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
892     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
893     PetscInt    nnwA, nnwB;
894     PetscBool   nnzA, nnzB;
895 
896     nnwA = aijA->nonew;
897     nnwB = aijB->nonew;
898     nnzA = aijA->keepnonzeropattern;
899     nnzB = aijB->keepnonzeropattern;
900     if (!nnzA) {
901       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
902       aijA->nonew = 0;
903     }
904     if (!nnzB) {
905       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
906       aijB->nonew = 0;
907     }
908     /* Must zero here before the next loop */
909     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
910     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
911     for (r = 0; r < len; ++r) {
912       const PetscInt row = lrows[r] + A->rmap->rstart;
913       if (row >= A->cmap->N) continue;
914       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
915     }
916     aijA->nonew = nnwA;
917     aijB->nonew = nnwB;
918   } else {
919     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
920     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
921   }
922   PetscCall(PetscFree(lrows));
923   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
924   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
928     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
929     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
930   }
931   PetscFunctionReturn(PETSC_SUCCESS);
932 }
933 
934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
935 {
936   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
937   PetscInt           n = A->rmap->n;
938   PetscInt           i, j, r, m, len = 0;
939   PetscInt          *lrows, *owners = A->rmap->range;
940   PetscMPIInt        p = 0;
941   PetscSFNode       *rrows;
942   PetscSF            sf;
943   const PetscScalar *xx;
944   PetscScalar       *bb, *mask, *aij_a;
945   Vec                xmask, lmask;
946   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
947   const PetscInt    *aj, *ii, *ridx;
948   PetscScalar       *aa;
949 
950   PetscFunctionBegin;
951   /* Create SF where leaves are input rows and roots are owned rows */
952   PetscCall(PetscMalloc1(n, &lrows));
953   for (r = 0; r < n; ++r) lrows[r] = -1;
954   PetscCall(PetscMalloc1(N, &rrows));
955   for (r = 0; r < N; ++r) {
956     const PetscInt idx = rows[r];
957     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
958     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
959       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
960     }
961     rrows[r].rank  = p;
962     rrows[r].index = rows[r] - owners[p];
963   }
964   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
965   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
966   /* Collect flags for rows to be zeroed */
967   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
968   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
969   PetscCall(PetscSFDestroy(&sf));
970   /* Compress and put in row numbers */
971   for (r = 0; r < n; ++r)
972     if (lrows[r] >= 0) lrows[len++] = r;
973   /* zero diagonal part of matrix */
974   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
975   /* handle off-diagonal part of matrix */
976   PetscCall(MatCreateVecs(A, &xmask, NULL));
977   PetscCall(VecDuplicate(l->lvec, &lmask));
978   PetscCall(VecGetArray(xmask, &bb));
979   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
980   PetscCall(VecRestoreArray(xmask, &bb));
981   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
982   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
983   PetscCall(VecDestroy(&xmask));
984   if (x && b) { /* this code is buggy when the row and column layout don't match */
985     PetscBool cong;
986 
987     PetscCall(MatHasCongruentLayouts(A, &cong));
988     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
989     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
990     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
991     PetscCall(VecGetArrayRead(l->lvec, &xx));
992     PetscCall(VecGetArray(b, &bb));
993   }
994   PetscCall(VecGetArray(lmask, &mask));
995   /* remove zeroed rows of off-diagonal matrix */
996   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
997   ii = aij->i;
998   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
999   /* loop over all elements of off process part of matrix zeroing removed columns*/
1000   if (aij->compressedrow.use) {
1001     m    = aij->compressedrow.nrows;
1002     ii   = aij->compressedrow.i;
1003     ridx = aij->compressedrow.rindex;
1004     for (i = 0; i < m; i++) {
1005       n  = ii[i + 1] - ii[i];
1006       aj = aij->j + ii[i];
1007       aa = aij_a + ii[i];
1008 
1009       for (j = 0; j < n; j++) {
1010         if (PetscAbsScalar(mask[*aj])) {
1011           if (b) bb[*ridx] -= *aa * xx[*aj];
1012           *aa = 0.0;
1013         }
1014         aa++;
1015         aj++;
1016       }
1017       ridx++;
1018     }
1019   } else { /* do not use compressed row format */
1020     m = l->B->rmap->n;
1021     for (i = 0; i < m; i++) {
1022       n  = ii[i + 1] - ii[i];
1023       aj = aij->j + ii[i];
1024       aa = aij_a + ii[i];
1025       for (j = 0; j < n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           if (b) bb[i] -= *aa * xx[*aj];
1028           *aa = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033     }
1034   }
1035   if (x && b) {
1036     PetscCall(VecRestoreArray(b, &bb));
1037     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1038   }
1039   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1040   PetscCall(VecRestoreArray(lmask, &mask));
1041   PetscCall(VecDestroy(&lmask));
1042   PetscCall(PetscFree(lrows));
1043 
1044   /* only change matrix nonzero state if pattern was allowed to be changed */
1045   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1046     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1047     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1048   }
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1055   PetscInt    nt;
1056   VecScatter  Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   PetscCall(VecGetLocalSize(xx, &nt));
1060   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscUseTypeMethod(a->A, mult, xx, yy);
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1078 {
1079   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1080   VecScatter  Mvctx = a->Mvctx;
1081 
1082   PetscFunctionBegin;
1083   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1084   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1085   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1086   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1091 {
1092   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1093 
1094   PetscFunctionBegin;
1095   /* do nondiagonal part */
1096   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1097   /* do local part */
1098   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1099   /* add partial results together */
1100   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1101   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1106 {
1107   MPI_Comm    comm;
1108   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1109   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1110   IS          Me, Notme;
1111   PetscInt    M, N, first, last, *notme, i;
1112   PetscBool   lf;
1113   PetscMPIInt size;
1114 
1115   PetscFunctionBegin;
1116   /* Easy test: symmetric diagonal block */
1117   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1118   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1119   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1120   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1121   PetscCallMPI(MPI_Comm_size(comm, &size));
1122   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1123 
1124   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1125   PetscCall(MatGetSize(Amat, &M, &N));
1126   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1127   PetscCall(PetscMalloc1(N - last + first, &notme));
1128   for (i = 0; i < first; i++) notme[i] = i;
1129   for (i = last; i < M; i++) notme[i - last + first] = i;
1130   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1131   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1132   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1133   Aoff = Aoffs[0];
1134   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1135   Boff = Boffs[0];
1136   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1137   PetscCall(MatDestroyMatrices(1, &Aoffs));
1138   PetscCall(MatDestroyMatrices(1, &Boffs));
1139   PetscCall(ISDestroy(&Me));
1140   PetscCall(ISDestroy(&Notme));
1141   PetscCall(PetscFree(notme));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1146 {
1147   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1148 
1149   PetscFunctionBegin;
1150   /* do nondiagonal part */
1151   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1152   /* do local part */
1153   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1154   /* add partial results together */
1155   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1157   PetscFunctionReturn(PETSC_SUCCESS);
1158 }
1159 
1160 /*
1161   This only works correctly for square matrices where the subblock A->A is the
1162    diagonal block
1163 */
1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1165 {
1166   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1167 
1168   PetscFunctionBegin;
1169   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1170   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1171   PetscCall(MatGetDiagonal(a->A, v));
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1176 {
1177   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(MatScale(a->A, aa));
1181   PetscCall(MatScale(a->B, aa));
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1186 {
1187   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1188   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1189   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1190   const PetscInt    *garray = aij->garray;
1191   const PetscScalar *aa, *ba;
1192   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1193   PetscInt64         nz, hnz;
1194   PetscInt          *rowlens;
1195   PetscInt          *colidxs;
1196   PetscScalar       *matvals;
1197   PetscMPIInt        rank;
1198 
1199   PetscFunctionBegin;
1200   PetscCall(PetscViewerSetUp(viewer));
1201 
1202   M  = mat->rmap->N;
1203   N  = mat->cmap->N;
1204   m  = mat->rmap->n;
1205   rs = mat->rmap->rstart;
1206   cs = mat->cmap->rstart;
1207   nz = A->nz + B->nz;
1208 
1209   /* write matrix header */
1210   header[0] = MAT_FILE_CLASSID;
1211   header[1] = M;
1212   header[2] = N;
1213   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1214   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1215   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1216   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1217 
1218   /* fill in and store row lengths  */
1219   PetscCall(PetscMalloc1(m, &rowlens));
1220   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1221   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1222   PetscCall(PetscFree(rowlens));
1223 
1224   /* fill in and store column indices */
1225   PetscCall(PetscMalloc1(nz, &colidxs));
1226   for (cnt = 0, i = 0; i < m; i++) {
1227     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1228       if (garray[B->j[jb]] > cs) break;
1229       colidxs[cnt++] = garray[B->j[jb]];
1230     }
1231     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1232     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1233   }
1234   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1235   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1236   PetscCall(PetscFree(colidxs));
1237 
1238   /* fill in and store nonzero values */
1239   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1240   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1241   PetscCall(PetscMalloc1(nz, &matvals));
1242   for (cnt = 0, i = 0; i < m; i++) {
1243     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       matvals[cnt++] = ba[jb];
1246     }
1247     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1248     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1249   }
1250   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1251   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1252   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1254   PetscCall(PetscFree(matvals));
1255 
1256   /* write block size option to the viewer's .info file */
1257   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1258   PetscFunctionReturn(PETSC_SUCCESS);
1259 }
1260 
1261 #include <petscdraw.h>
1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1265   PetscMPIInt       rank = aij->rank, size = aij->size;
1266   PetscBool         isdraw, iascii, isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1273   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1274   if (iascii) {
1275     PetscCall(PetscViewerGetFormat(viewer, &format));
1276     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1277       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1278       PetscCall(PetscMalloc1(size, &nz));
1279       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1280       for (i = 0; i < size; i++) {
1281         nmax = PetscMax(nmax, nz[i]);
1282         nmin = PetscMin(nmin, nz[i]);
1283         navg += nz[i];
1284       }
1285       PetscCall(PetscFree(nz));
1286       navg = navg / size;
1287       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1288       PetscFunctionReturn(PETSC_SUCCESS);
1289     }
1290     PetscCall(PetscViewerGetFormat(viewer, &format));
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo   info;
1293       PetscInt *inodes = NULL;
1294 
1295       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1296       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1297       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1298       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1299       if (!inodes) {
1300         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1301                                                      info.memory));
1302       } else {
1303         PetscCall(
1304           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1305       }
1306       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1307       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1308       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1309       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1310       PetscCall(PetscViewerFlush(viewer));
1311       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1312       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1313       PetscCall(VecScatterView(aij->Mvctx, viewer));
1314       PetscFunctionReturn(PETSC_SUCCESS);
1315     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1316       PetscInt inodecount, inodelimit, *inodes;
1317       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1318       if (inodes) {
1319         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1320       } else {
1321         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1322       }
1323       PetscFunctionReturn(PETSC_SUCCESS);
1324     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1325       PetscFunctionReturn(PETSC_SUCCESS);
1326     }
1327   } else if (isbinary) {
1328     if (size == 1) {
1329       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1330       PetscCall(MatView(aij->A, viewer));
1331     } else {
1332       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1333     }
1334     PetscFunctionReturn(PETSC_SUCCESS);
1335   } else if (iascii && size == 1) {
1336     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1337     PetscCall(MatView(aij->A, viewer));
1338     PetscFunctionReturn(PETSC_SUCCESS);
1339   } else if (isdraw) {
1340     PetscDraw draw;
1341     PetscBool isnull;
1342     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1343     PetscCall(PetscDrawIsNull(draw, &isnull));
1344     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1345   }
1346 
1347   { /* assemble the entire matrix onto first processor */
1348     Mat A = NULL, Av;
1349     IS  isrow, iscol;
1350 
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1352     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1353     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1354     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1355     /*  The commented code uses MatCreateSubMatrices instead */
1356     /*
1357     Mat *AA, A = NULL, Av;
1358     IS  isrow,iscol;
1359 
1360     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1361     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1362     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1363     if (rank == 0) {
1364        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1365        A    = AA[0];
1366        Av   = AA[0];
1367     }
1368     PetscCall(MatDestroySubMatrices(1,&AA));
1369 */
1370     PetscCall(ISDestroy(&iscol));
1371     PetscCall(ISDestroy(&isrow));
1372     /*
1373        Everyone has to call to draw the matrix since the graphics waits are
1374        synchronized across all processors that share the PetscDraw object
1375     */
1376     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1377     if (rank == 0) {
1378       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1379       PetscCall(MatView_SeqAIJ(Av, sviewer));
1380     }
1381     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1382     PetscCall(MatDestroy(&A));
1383   }
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1388 {
1389   PetscBool iascii, isdraw, issocket, isbinary;
1390 
1391   PetscFunctionBegin;
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1395   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1396   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1401 {
1402   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1403   Vec         bb1 = NULL;
1404   PetscBool   hasop;
1405 
1406   PetscFunctionBegin;
1407   if (flag == SOR_APPLY_UPPER) {
1408     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1409     PetscFunctionReturn(PETSC_SUCCESS);
1410   }
1411 
1412   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1413 
1414   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419 
1420     while (its--) {
1421       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1423 
1424       /* update rhs: bb1 = bb - B*x */
1425       PetscCall(VecScale(mat->lvec, -1.0));
1426       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1427 
1428       /* local sweep */
1429       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1430     }
1431   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1432     if (flag & SOR_ZERO_INITIAL_GUESS) {
1433       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1434       its--;
1435     }
1436     while (its--) {
1437       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       PetscCall(VecScale(mat->lvec, -1.0));
1442       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1443 
1444       /* local sweep */
1445       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1446     }
1447   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1450       its--;
1451     }
1452     while (its--) {
1453       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       PetscCall(VecScale(mat->lvec, -1.0));
1458       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1459 
1460       /* local sweep */
1461       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1462     }
1463   } else if (flag & SOR_EISENSTAT) {
1464     Vec xx1;
1465 
1466     PetscCall(VecDuplicate(bb, &xx1));
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1468 
1469     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1470     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1471     if (!mat->diag) {
1472       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1473       PetscCall(MatGetDiagonal(matin, mat->diag));
1474     }
1475     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1476     if (hasop) {
1477       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1478     } else {
1479       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1480     }
1481     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1482 
1483     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1484 
1485     /* local sweep */
1486     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1487     PetscCall(VecAXPY(xx, 1.0, xx1));
1488     PetscCall(VecDestroy(&xx1));
1489   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1490 
1491   PetscCall(VecDestroy(&bb1));
1492 
1493   matin->factorerrortype = mat->A->factorerrortype;
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1498 {
1499   Mat             aA, aB, Aperm;
1500   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1501   PetscScalar    *aa, *ba;
1502   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1503   PetscSF         rowsf, sf;
1504   IS              parcolp = NULL;
1505   PetscBool       done;
1506 
1507   PetscFunctionBegin;
1508   PetscCall(MatGetLocalSize(A, &m, &n));
1509   PetscCall(ISGetIndices(rowp, &rwant));
1510   PetscCall(ISGetIndices(colp, &cwant));
1511   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1512 
1513   /* Invert row permutation to find out where my rows should go */
1514   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1515   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1516   PetscCall(PetscSFSetFromOptions(rowsf));
1517   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1518   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1519   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1520 
1521   /* Invert column permutation to find out where my columns should go */
1522   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1523   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1524   PetscCall(PetscSFSetFromOptions(sf));
1525   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1526   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1527   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1528   PetscCall(PetscSFDestroy(&sf));
1529 
1530   PetscCall(ISRestoreIndices(rowp, &rwant));
1531   PetscCall(ISRestoreIndices(colp, &cwant));
1532   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1533 
1534   /* Find out where my gcols should go */
1535   PetscCall(MatGetSize(aB, NULL, &ng));
1536   PetscCall(PetscMalloc1(ng, &gcdest));
1537   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1538   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1539   PetscCall(PetscSFSetFromOptions(sf));
1540   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1541   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1542   PetscCall(PetscSFDestroy(&sf));
1543 
1544   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1545   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1546   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1547   for (i = 0; i < m; i++) {
1548     PetscInt    row = rdest[i];
1549     PetscMPIInt rowner;
1550     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1551     for (j = ai[i]; j < ai[i + 1]; j++) {
1552       PetscInt    col = cdest[aj[j]];
1553       PetscMPIInt cowner;
1554       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1555       if (rowner == cowner) dnnz[i]++;
1556       else onnz[i]++;
1557     }
1558     for (j = bi[i]; j < bi[i + 1]; j++) {
1559       PetscInt    col = gcdest[bj[j]];
1560       PetscMPIInt cowner;
1561       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1562       if (rowner == cowner) dnnz[i]++;
1563       else onnz[i]++;
1564     }
1565   }
1566   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1568   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1570   PetscCall(PetscSFDestroy(&rowsf));
1571 
1572   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1573   PetscCall(MatSeqAIJGetArray(aA, &aa));
1574   PetscCall(MatSeqAIJGetArray(aB, &ba));
1575   for (i = 0; i < m; i++) {
1576     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1577     PetscInt  j0, rowlen;
1578     rowlen = ai[i + 1] - ai[i];
1579     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1580       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1581       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1582     }
1583     rowlen = bi[i + 1] - bi[i];
1584     for (j0 = j = 0; j < rowlen; j0 = j) {
1585       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1586       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1587     }
1588   }
1589   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1591   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1592   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1593   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1594   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1595   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1596   PetscCall(PetscFree3(work, rdest, cdest));
1597   PetscCall(PetscFree(gcdest));
1598   if (parcolp) PetscCall(ISDestroy(&colp));
1599   *B = Aperm;
1600   PetscFunctionReturn(PETSC_SUCCESS);
1601 }
1602 
1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1604 {
1605   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1606 
1607   PetscFunctionBegin;
1608   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1609   if (ghosts) *ghosts = aij->garray;
1610   PetscFunctionReturn(PETSC_SUCCESS);
1611 }
1612 
1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1614 {
1615   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1616   Mat            A = mat->A, B = mat->B;
1617   PetscLogDouble isend[5], irecv[5];
1618 
1619   PetscFunctionBegin;
1620   info->block_size = 1.0;
1621   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1622 
1623   isend[0] = info->nz_used;
1624   isend[1] = info->nz_allocated;
1625   isend[2] = info->nz_unneeded;
1626   isend[3] = info->memory;
1627   isend[4] = info->mallocs;
1628 
1629   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1630 
1631   isend[0] += info->nz_used;
1632   isend[1] += info->nz_allocated;
1633   isend[2] += info->nz_unneeded;
1634   isend[3] += info->memory;
1635   isend[4] += info->mallocs;
1636   if (flag == MAT_LOCAL) {
1637     info->nz_used      = isend[0];
1638     info->nz_allocated = isend[1];
1639     info->nz_unneeded  = isend[2];
1640     info->memory       = isend[3];
1641     info->mallocs      = isend[4];
1642   } else if (flag == MAT_GLOBAL_MAX) {
1643     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1644 
1645     info->nz_used      = irecv[0];
1646     info->nz_allocated = irecv[1];
1647     info->nz_unneeded  = irecv[2];
1648     info->memory       = irecv[3];
1649     info->mallocs      = irecv[4];
1650   } else if (flag == MAT_GLOBAL_SUM) {
1651     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1652 
1653     info->nz_used      = irecv[0];
1654     info->nz_allocated = irecv[1];
1655     info->nz_unneeded  = irecv[2];
1656     info->memory       = irecv[3];
1657     info->mallocs      = irecv[4];
1658   }
1659   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1660   info->fill_ratio_needed = 0;
1661   info->factor_mallocs    = 0;
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1666 {
1667   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1668 
1669   PetscFunctionBegin;
1670   switch (op) {
1671   case MAT_NEW_NONZERO_LOCATIONS:
1672   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1673   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1674   case MAT_KEEP_NONZERO_PATTERN:
1675   case MAT_NEW_NONZERO_LOCATION_ERR:
1676   case MAT_USE_INODES:
1677   case MAT_IGNORE_ZERO_ENTRIES:
1678   case MAT_FORM_EXPLICIT_TRANSPOSE:
1679     MatCheckPreallocated(A, 1);
1680     PetscCall(MatSetOption(a->A, op, flg));
1681     PetscCall(MatSetOption(a->B, op, flg));
1682     break;
1683   case MAT_ROW_ORIENTED:
1684     MatCheckPreallocated(A, 1);
1685     a->roworiented = flg;
1686 
1687     PetscCall(MatSetOption(a->A, op, flg));
1688     PetscCall(MatSetOption(a->B, op, flg));
1689     break;
1690   case MAT_IGNORE_OFF_PROC_ENTRIES:
1691     a->donotstash = flg;
1692     break;
1693   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1694   case MAT_SPD:
1695   case MAT_SYMMETRIC:
1696   case MAT_STRUCTURALLY_SYMMETRIC:
1697   case MAT_HERMITIAN:
1698   case MAT_SYMMETRY_ETERNAL:
1699   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1700   case MAT_SPD_ETERNAL:
1701     /* if the diagonal matrix is square it inherits some of the properties above */
1702     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1703     break;
1704   case MAT_SUBMAT_SINGLEIS:
1705     A->submat_singleis = flg;
1706     break;
1707   default:
1708     break;
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1714 {
1715   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1716   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1717   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1718   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1719   PetscInt    *cmap, *idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1730     PetscInt    max = 1, tmp;
1731     for (i = 0; i < matin->rmap->n; i++) {
1732       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1736   }
1737 
1738   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA;
1742   pcA = &cworkA;
1743   pvB = &vworkB;
1744   pcB = &cworkB;
1745   if (!v) {
1746     pvA = NULL;
1747     pvB = NULL;
1748   }
1749   if (!idx) {
1750     pcA = NULL;
1751     if (!v) pcB = NULL;
1752   }
1753   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1754   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1755   nztot = nzA + nzB;
1756 
1757   cmap = mat->garray;
1758   if (v || idx) {
1759     if (nztot) {
1760       /* Sort by increasing column numbers, assuming A and B already sorted */
1761       PetscInt imark = -1;
1762       if (v) {
1763         *v = v_p = mat->rowvalues;
1764         for (i = 0; i < nzB; i++) {
1765           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766           else break;
1767         }
1768         imark = i;
1769         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1770         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1771       }
1772       if (idx) {
1773         *idx = idx_p = mat->rowindices;
1774         if (imark > -1) {
1775           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1776         } else {
1777           for (i = 0; i < nzB; i++) {
1778             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1779             else break;
1780           }
1781           imark = i;
1782         }
1783         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1784         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1785       }
1786     } else {
1787       if (idx) *idx = NULL;
1788       if (v) *v = NULL;
1789     }
1790   }
1791   *nz = nztot;
1792   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1793   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1798 {
1799   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1800 
1801   PetscFunctionBegin;
1802   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1803   aij->getrowactive = PETSC_FALSE;
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1808 {
1809   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1810   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1811   PetscInt         i, j, cstart = mat->cmap->rstart;
1812   PetscReal        sum = 0.0;
1813   const MatScalar *v, *amata, *bmata;
1814 
1815   PetscFunctionBegin;
1816   if (aij->size == 1) {
1817     PetscCall(MatNorm(aij->A, type, norm));
1818   } else {
1819     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1820     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1821     if (type == NORM_FROBENIUS) {
1822       v = amata;
1823       for (i = 0; i < amat->nz; i++) {
1824         sum += PetscRealPart(PetscConj(*v) * (*v));
1825         v++;
1826       }
1827       v = bmata;
1828       for (i = 0; i < bmat->nz; i++) {
1829         sum += PetscRealPart(PetscConj(*v) * (*v));
1830         v++;
1831       }
1832       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1833       *norm = PetscSqrtReal(*norm);
1834       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1835     } else if (type == NORM_1) { /* max column norm */
1836       PetscReal *tmp;
1837       PetscInt  *jj, *garray = aij->garray;
1838       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1839       *norm = 0.0;
1840       v     = amata;
1841       jj    = amat->j;
1842       for (j = 0; j < amat->nz; j++) {
1843         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1844         v++;
1845       }
1846       v  = bmata;
1847       jj = bmat->j;
1848       for (j = 0; j < bmat->nz; j++) {
1849         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1850         v++;
1851       }
1852       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1853       for (j = 0; j < mat->cmap->N; j++) {
1854         if (tmp[j] > *norm) *norm = tmp[j];
1855       }
1856       PetscCall(PetscFree(tmp));
1857       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1858     } else if (type == NORM_INFINITY) { /* max row norm */
1859       PetscReal ntemp = 0.0;
1860       for (j = 0; j < aij->A->rmap->n; j++) {
1861         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1862         sum = 0.0;
1863         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1864           sum += PetscAbsScalar(*v);
1865           v++;
1866         }
1867         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1868         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1869           sum += PetscAbsScalar(*v);
1870           v++;
1871         }
1872         if (sum > ntemp) ntemp = sum;
1873       }
1874       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1875       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1876     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1877     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1878     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1879   }
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1884 {
1885   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1886   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1887   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1888   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1889   Mat              B, A_diag, *B_diag;
1890   const MatScalar *pbv, *bv;
1891 
1892   PetscFunctionBegin;
1893   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1894   ma = A->rmap->n;
1895   na = A->cmap->n;
1896   mb = a->B->rmap->n;
1897   nb = a->B->cmap->n;
1898   ai = Aloc->i;
1899   aj = Aloc->j;
1900   bi = Bloc->i;
1901   bj = Bloc->j;
1902   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1903     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1904     PetscSFNode         *oloc;
1905     PETSC_UNUSED PetscSF sf;
1906 
1907     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1908     /* compute d_nnz for preallocation */
1909     PetscCall(PetscArrayzero(d_nnz, na));
1910     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1911     /* compute local off-diagonal contributions */
1912     PetscCall(PetscArrayzero(g_nnz, nb));
1913     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1914     /* map those to global */
1915     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1916     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1917     PetscCall(PetscSFSetFromOptions(sf));
1918     PetscCall(PetscArrayzero(o_nnz, na));
1919     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1920     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1921     PetscCall(PetscSFDestroy(&sf));
1922 
1923     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1924     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1925     PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs));
1926     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1927     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1928     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1929   } else {
1930     B = *matout;
1931     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1932   }
1933 
1934   b           = (Mat_MPIAIJ *)B->data;
1935   A_diag      = a->A;
1936   B_diag      = &b->A;
1937   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1938   A_diag_ncol = A_diag->cmap->N;
1939   B_diag_ilen = sub_B_diag->ilen;
1940   B_diag_i    = sub_B_diag->i;
1941 
1942   /* Set ilen for diagonal of B */
1943   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1944 
1945   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1946   very quickly (=without using MatSetValues), because all writes are local. */
1947   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1948   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1949 
1950   /* copy over the B part */
1951   PetscCall(PetscMalloc1(bi[mb], &cols));
1952   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1953   pbv = bv;
1954   row = A->rmap->rstart;
1955   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1956   cols_tmp = cols;
1957   for (i = 0; i < mb; i++) {
1958     ncol = bi[i + 1] - bi[i];
1959     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1960     row++;
1961     if (pbv) pbv += ncol;
1962     if (cols_tmp) cols_tmp += ncol;
1963   }
1964   PetscCall(PetscFree(cols));
1965   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1966 
1967   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1968   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1970     *matout = B;
1971   } else {
1972     PetscCall(MatHeaderMerge(A, &B));
1973   }
1974   PetscFunctionReturn(PETSC_SUCCESS);
1975 }
1976 
1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1978 {
1979   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1980   Mat         a = aij->A, b = aij->B;
1981   PetscInt    s1, s2, s3;
1982 
1983   PetscFunctionBegin;
1984   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1985   if (rr) {
1986     PetscCall(VecGetLocalSize(rr, &s1));
1987     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1988     /* Overlap communication with computation. */
1989     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1990   }
1991   if (ll) {
1992     PetscCall(VecGetLocalSize(ll, &s1));
1993     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1994     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1995   }
1996   /* scale  the diagonal block */
1997   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1998 
1999   if (rr) {
2000     /* Do a scatter end and then right scale the off-diagonal block */
2001     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2002     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2003   }
2004   PetscFunctionReturn(PETSC_SUCCESS);
2005 }
2006 
2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2008 {
2009   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2010 
2011   PetscFunctionBegin;
2012   PetscCall(MatSetUnfactored(a->A));
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2017 {
2018   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2019   Mat         a, b, c, d;
2020   PetscBool   flg;
2021 
2022   PetscFunctionBegin;
2023   a = matA->A;
2024   b = matA->B;
2025   c = matB->A;
2026   d = matB->B;
2027 
2028   PetscCall(MatEqual(a, c, &flg));
2029   if (flg) PetscCall(MatEqual(b, d, &flg));
2030   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2035 {
2036   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2037   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2038 
2039   PetscFunctionBegin;
2040   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2041   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2042     /* because of the column compression in the off-processor part of the matrix a->B,
2043        the number of columns in a->B and b->B may be different, hence we cannot call
2044        the MatCopy() directly on the two parts. If need be, we can provide a more
2045        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2046        then copying the submatrices */
2047     PetscCall(MatCopy_Basic(A, B, str));
2048   } else {
2049     PetscCall(MatCopy(a->A, b->A, str));
2050     PetscCall(MatCopy(a->B, b->B, str));
2051   }
2052   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 /*
2057    Computes the number of nonzeros per row needed for preallocation when X and Y
2058    have different nonzero structure.
2059 */
2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2061 {
2062   PetscInt i, j, k, nzx, nzy;
2063 
2064   PetscFunctionBegin;
2065   /* Set the number of nonzeros in the new matrix */
2066   for (i = 0; i < m; i++) {
2067     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2068     nzx    = xi[i + 1] - xi[i];
2069     nzy    = yi[i + 1] - yi[i];
2070     nnz[i] = 0;
2071     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2072       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2073       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2074       nnz[i]++;
2075     }
2076     for (; k < nzy; k++) nnz[i]++;
2077   }
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 
2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2083 {
2084   PetscInt    m = Y->rmap->N;
2085   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2086   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2087 
2088   PetscFunctionBegin;
2089   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2090   PetscFunctionReturn(PETSC_SUCCESS);
2091 }
2092 
2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2094 {
2095   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2096 
2097   PetscFunctionBegin;
2098   if (str == SAME_NONZERO_PATTERN) {
2099     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2100     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2101   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2102     PetscCall(MatAXPY_Basic(Y, a, X, str));
2103   } else {
2104     Mat       B;
2105     PetscInt *nnz_d, *nnz_o;
2106 
2107     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2108     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2109     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2110     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2111     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2112     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2113     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2114     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2115     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2116     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2117     PetscCall(MatHeaderMerge(Y, &B));
2118     PetscCall(PetscFree(nnz_d));
2119     PetscCall(PetscFree(nnz_o));
2120   }
2121   PetscFunctionReturn(PETSC_SUCCESS);
2122 }
2123 
2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2125 
2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2127 {
2128   PetscFunctionBegin;
2129   if (PetscDefined(USE_COMPLEX)) {
2130     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2131 
2132     PetscCall(MatConjugate_SeqAIJ(aij->A));
2133     PetscCall(MatConjugate_SeqAIJ(aij->B));
2134   }
2135   PetscFunctionReturn(PETSC_SUCCESS);
2136 }
2137 
2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2139 {
2140   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2141 
2142   PetscFunctionBegin;
2143   PetscCall(MatRealPart(a->A));
2144   PetscCall(MatRealPart(a->B));
2145   PetscFunctionReturn(PETSC_SUCCESS);
2146 }
2147 
2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2149 {
2150   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2151 
2152   PetscFunctionBegin;
2153   PetscCall(MatImaginaryPart(a->A));
2154   PetscCall(MatImaginaryPart(a->B));
2155   PetscFunctionReturn(PETSC_SUCCESS);
2156 }
2157 
2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2159 {
2160   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2161   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2162   PetscScalar       *vv;
2163   Vec                vB, vA;
2164   const PetscScalar *va, *vb;
2165 
2166   PetscFunctionBegin;
2167   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2168   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2169 
2170   PetscCall(VecGetArrayRead(vA, &va));
2171   if (idx) {
2172     for (i = 0; i < m; i++) {
2173       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2174     }
2175   }
2176 
2177   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2178   PetscCall(PetscMalloc1(m, &idxb));
2179   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2180 
2181   PetscCall(VecGetArrayWrite(v, &vv));
2182   PetscCall(VecGetArrayRead(vB, &vb));
2183   for (i = 0; i < m; i++) {
2184     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2185       vv[i] = vb[i];
2186       if (idx) idx[i] = a->garray[idxb[i]];
2187     } else {
2188       vv[i] = va[i];
2189       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2190     }
2191   }
2192   PetscCall(VecRestoreArrayWrite(v, &vv));
2193   PetscCall(VecRestoreArrayRead(vA, &va));
2194   PetscCall(VecRestoreArrayRead(vB, &vb));
2195   PetscCall(PetscFree(idxb));
2196   PetscCall(VecDestroy(&vA));
2197   PetscCall(VecDestroy(&vB));
2198   PetscFunctionReturn(PETSC_SUCCESS);
2199 }
2200 
2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2202 {
2203   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2204   Vec         vB, vA;
2205 
2206   PetscFunctionBegin;
2207   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2208   PetscCall(MatGetRowSumAbs(a->A, vA));
2209   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2210   PetscCall(MatGetRowSumAbs(a->B, vB));
2211   PetscCall(VecAXPY(vA, 1.0, vB));
2212   PetscCall(VecDestroy(&vB));
2213   PetscCall(VecCopy(vA, v));
2214   PetscCall(VecDestroy(&vA));
2215   PetscFunctionReturn(PETSC_SUCCESS);
2216 }
2217 
2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2219 {
2220   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2221   PetscInt           m = A->rmap->n, n = A->cmap->n;
2222   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2223   PetscInt          *cmap = mat->garray;
2224   PetscInt          *diagIdx, *offdiagIdx;
2225   Vec                diagV, offdiagV;
2226   PetscScalar       *a, *diagA, *offdiagA;
2227   const PetscScalar *ba, *bav;
2228   PetscInt           r, j, col, ncols, *bi, *bj;
2229   Mat                B = mat->B;
2230   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2231 
2232   PetscFunctionBegin;
2233   /* When a process holds entire A and other processes have no entry */
2234   if (A->cmap->N == n) {
2235     PetscCall(VecGetArrayWrite(v, &diagA));
2236     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2237     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2238     PetscCall(VecDestroy(&diagV));
2239     PetscCall(VecRestoreArrayWrite(v, &diagA));
2240     PetscFunctionReturn(PETSC_SUCCESS);
2241   } else if (n == 0) {
2242     if (m) {
2243       PetscCall(VecGetArrayWrite(v, &a));
2244       for (r = 0; r < m; r++) {
2245         a[r] = 0.0;
2246         if (idx) idx[r] = -1;
2247       }
2248       PetscCall(VecRestoreArrayWrite(v, &a));
2249     }
2250     PetscFunctionReturn(PETSC_SUCCESS);
2251   }
2252 
2253   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2255   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2256   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2257 
2258   /* Get offdiagIdx[] for implicit 0.0 */
2259   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2260   ba = bav;
2261   bi = b->i;
2262   bj = b->j;
2263   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2264   for (r = 0; r < m; r++) {
2265     ncols = bi[r + 1] - bi[r];
2266     if (ncols == A->cmap->N - n) { /* Brow is dense */
2267       offdiagA[r]   = *ba;
2268       offdiagIdx[r] = cmap[0];
2269     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2270       offdiagA[r] = 0.0;
2271 
2272       /* Find first hole in the cmap */
2273       for (j = 0; j < ncols; j++) {
2274         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2275         if (col > j && j < cstart) {
2276           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2277           break;
2278         } else if (col > j + n && j >= cstart) {
2279           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2280           break;
2281         }
2282       }
2283       if (j == ncols && ncols < A->cmap->N - n) {
2284         /* a hole is outside compressed Bcols */
2285         if (ncols == 0) {
2286           if (cstart) {
2287             offdiagIdx[r] = 0;
2288           } else offdiagIdx[r] = cend;
2289         } else { /* ncols > 0 */
2290           offdiagIdx[r] = cmap[ncols - 1] + 1;
2291           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2292         }
2293       }
2294     }
2295 
2296     for (j = 0; j < ncols; j++) {
2297       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2298         offdiagA[r]   = *ba;
2299         offdiagIdx[r] = cmap[*bj];
2300       }
2301       ba++;
2302       bj++;
2303     }
2304   }
2305 
2306   PetscCall(VecGetArrayWrite(v, &a));
2307   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2308   for (r = 0; r < m; ++r) {
2309     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) idx[r] = cstart + diagIdx[r];
2312     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2313       a[r] = diagA[r];
2314       if (idx) {
2315         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2316           idx[r] = cstart + diagIdx[r];
2317         } else idx[r] = offdiagIdx[r];
2318       }
2319     } else {
2320       a[r] = offdiagA[r];
2321       if (idx) idx[r] = offdiagIdx[r];
2322     }
2323   }
2324   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2325   PetscCall(VecRestoreArrayWrite(v, &a));
2326   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2327   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2328   PetscCall(VecDestroy(&diagV));
2329   PetscCall(VecDestroy(&offdiagV));
2330   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2331   PetscFunctionReturn(PETSC_SUCCESS);
2332 }
2333 
2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2335 {
2336   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2337   PetscInt           m = A->rmap->n, n = A->cmap->n;
2338   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2339   PetscInt          *cmap = mat->garray;
2340   PetscInt          *diagIdx, *offdiagIdx;
2341   Vec                diagV, offdiagV;
2342   PetscScalar       *a, *diagA, *offdiagA;
2343   const PetscScalar *ba, *bav;
2344   PetscInt           r, j, col, ncols, *bi, *bj;
2345   Mat                B = mat->B;
2346   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2347 
2348   PetscFunctionBegin;
2349   /* When a process holds entire A and other processes have no entry */
2350   if (A->cmap->N == n) {
2351     PetscCall(VecGetArrayWrite(v, &diagA));
2352     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2353     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2354     PetscCall(VecDestroy(&diagV));
2355     PetscCall(VecRestoreArrayWrite(v, &diagA));
2356     PetscFunctionReturn(PETSC_SUCCESS);
2357   } else if (n == 0) {
2358     if (m) {
2359       PetscCall(VecGetArrayWrite(v, &a));
2360       for (r = 0; r < m; r++) {
2361         a[r] = PETSC_MAX_REAL;
2362         if (idx) idx[r] = -1;
2363       }
2364       PetscCall(VecRestoreArrayWrite(v, &a));
2365     }
2366     PetscFunctionReturn(PETSC_SUCCESS);
2367   }
2368 
2369   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2371   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2372   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2373 
2374   /* Get offdiagIdx[] for implicit 0.0 */
2375   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2376   ba = bav;
2377   bi = b->i;
2378   bj = b->j;
2379   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2380   for (r = 0; r < m; r++) {
2381     ncols = bi[r + 1] - bi[r];
2382     if (ncols == A->cmap->N - n) { /* Brow is dense */
2383       offdiagA[r]   = *ba;
2384       offdiagIdx[r] = cmap[0];
2385     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2386       offdiagA[r] = 0.0;
2387 
2388       /* Find first hole in the cmap */
2389       for (j = 0; j < ncols; j++) {
2390         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2391         if (col > j && j < cstart) {
2392           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2393           break;
2394         } else if (col > j + n && j >= cstart) {
2395           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2396           break;
2397         }
2398       }
2399       if (j == ncols && ncols < A->cmap->N - n) {
2400         /* a hole is outside compressed Bcols */
2401         if (ncols == 0) {
2402           if (cstart) {
2403             offdiagIdx[r] = 0;
2404           } else offdiagIdx[r] = cend;
2405         } else { /* ncols > 0 */
2406           offdiagIdx[r] = cmap[ncols - 1] + 1;
2407           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2408         }
2409       }
2410     }
2411 
2412     for (j = 0; j < ncols; j++) {
2413       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2414         offdiagA[r]   = *ba;
2415         offdiagIdx[r] = cmap[*bj];
2416       }
2417       ba++;
2418       bj++;
2419     }
2420   }
2421 
2422   PetscCall(VecGetArrayWrite(v, &a));
2423   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2424   for (r = 0; r < m; ++r) {
2425     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2426       a[r] = diagA[r];
2427       if (idx) idx[r] = cstart + diagIdx[r];
2428     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2429       a[r] = diagA[r];
2430       if (idx) {
2431         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2432           idx[r] = cstart + diagIdx[r];
2433         } else idx[r] = offdiagIdx[r];
2434       }
2435     } else {
2436       a[r] = offdiagA[r];
2437       if (idx) idx[r] = offdiagIdx[r];
2438     }
2439   }
2440   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2441   PetscCall(VecRestoreArrayWrite(v, &a));
2442   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2443   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2444   PetscCall(VecDestroy(&diagV));
2445   PetscCall(VecDestroy(&offdiagV));
2446   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2447   PetscFunctionReturn(PETSC_SUCCESS);
2448 }
2449 
2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2451 {
2452   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2453   PetscInt           m = A->rmap->n, n = A->cmap->n;
2454   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2455   PetscInt          *cmap = mat->garray;
2456   PetscInt          *diagIdx, *offdiagIdx;
2457   Vec                diagV, offdiagV;
2458   PetscScalar       *a, *diagA, *offdiagA;
2459   const PetscScalar *ba, *bav;
2460   PetscInt           r, j, col, ncols, *bi, *bj;
2461   Mat                B = mat->B;
2462   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2463 
2464   PetscFunctionBegin;
2465   /* When a process holds entire A and other processes have no entry */
2466   if (A->cmap->N == n) {
2467     PetscCall(VecGetArrayWrite(v, &diagA));
2468     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2469     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2470     PetscCall(VecDestroy(&diagV));
2471     PetscCall(VecRestoreArrayWrite(v, &diagA));
2472     PetscFunctionReturn(PETSC_SUCCESS);
2473   } else if (n == 0) {
2474     if (m) {
2475       PetscCall(VecGetArrayWrite(v, &a));
2476       for (r = 0; r < m; r++) {
2477         a[r] = PETSC_MIN_REAL;
2478         if (idx) idx[r] = -1;
2479       }
2480       PetscCall(VecRestoreArrayWrite(v, &a));
2481     }
2482     PetscFunctionReturn(PETSC_SUCCESS);
2483   }
2484 
2485   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2487   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2488   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2492   ba = bav;
2493   bi = b->i;
2494   bj = b->j;
2495   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2496   for (r = 0; r < m; r++) {
2497     ncols = bi[r + 1] - bi[r];
2498     if (ncols == A->cmap->N - n) { /* Brow is dense */
2499       offdiagA[r]   = *ba;
2500       offdiagIdx[r] = cmap[0];
2501     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2502       offdiagA[r] = 0.0;
2503 
2504       /* Find first hole in the cmap */
2505       for (j = 0; j < ncols; j++) {
2506         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2507         if (col > j && j < cstart) {
2508           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2509           break;
2510         } else if (col > j + n && j >= cstart) {
2511           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2512           break;
2513         }
2514       }
2515       if (j == ncols && ncols < A->cmap->N - n) {
2516         /* a hole is outside compressed Bcols */
2517         if (ncols == 0) {
2518           if (cstart) {
2519             offdiagIdx[r] = 0;
2520           } else offdiagIdx[r] = cend;
2521         } else { /* ncols > 0 */
2522           offdiagIdx[r] = cmap[ncols - 1] + 1;
2523           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2524         }
2525       }
2526     }
2527 
2528     for (j = 0; j < ncols; j++) {
2529       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2530         offdiagA[r]   = *ba;
2531         offdiagIdx[r] = cmap[*bj];
2532       }
2533       ba++;
2534       bj++;
2535     }
2536   }
2537 
2538   PetscCall(VecGetArrayWrite(v, &a));
2539   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2540   for (r = 0; r < m; ++r) {
2541     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2542       a[r] = diagA[r];
2543       if (idx) idx[r] = cstart + diagIdx[r];
2544     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2545       a[r] = diagA[r];
2546       if (idx) {
2547         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2548           idx[r] = cstart + diagIdx[r];
2549         } else idx[r] = offdiagIdx[r];
2550       }
2551     } else {
2552       a[r] = offdiagA[r];
2553       if (idx) idx[r] = offdiagIdx[r];
2554     }
2555   }
2556   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2557   PetscCall(VecRestoreArrayWrite(v, &a));
2558   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2559   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2560   PetscCall(VecDestroy(&diagV));
2561   PetscCall(VecDestroy(&offdiagV));
2562   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2567 {
2568   Mat *dummy;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2572   *newmat = *dummy;
2573   PetscCall(PetscFree(dummy));
2574   PetscFunctionReturn(PETSC_SUCCESS);
2575 }
2576 
2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2578 {
2579   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2580 
2581   PetscFunctionBegin;
2582   PetscCall(MatInvertBlockDiagonal(a->A, values));
2583   A->factorerrortype = a->A->factorerrortype;
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2588 {
2589   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2590 
2591   PetscFunctionBegin;
2592   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2593   PetscCall(MatSetRandom(aij->A, rctx));
2594   if (x->assembled) {
2595     PetscCall(MatSetRandom(aij->B, rctx));
2596   } else {
2597     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2598   }
2599   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2600   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2601   PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603 
2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2605 {
2606   PetscFunctionBegin;
2607   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2608   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2609   PetscFunctionReturn(PETSC_SUCCESS);
2610 }
2611 
2612 /*@
2613   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2614 
2615   Not Collective
2616 
2617   Input Parameter:
2618 . A - the matrix
2619 
2620   Output Parameter:
2621 . nz - the number of nonzeros
2622 
2623   Level: advanced
2624 
2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2626 @*/
2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2628 {
2629   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2630   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2631   PetscBool   isaij;
2632 
2633   PetscFunctionBegin;
2634   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2635   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2636   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2637   PetscFunctionReturn(PETSC_SUCCESS);
2638 }
2639 
2640 /*@
2641   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2642 
2643   Collective
2644 
2645   Input Parameters:
2646 + A  - the matrix
2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2648 
2649   Level: advanced
2650 
2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2652 @*/
2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2654 {
2655   PetscFunctionBegin;
2656   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject)
2661 {
2662   PetscBool sc = PETSC_FALSE, flg;
2663 
2664   PetscFunctionBegin;
2665   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2666   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2667   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2668   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2669   PetscOptionsHeadEnd();
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2674 {
2675   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2676   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2677 
2678   PetscFunctionBegin;
2679   if (!Y->preallocated) {
2680     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2681   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2682     PetscInt nonew = aij->nonew;
2683     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2684     aij->nonew = nonew;
2685   }
2686   PetscCall(MatShift_Basic(Y, a));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2696   PetscCall(MatMissingDiagonal(a->A, missing, d));
2697   if (d) {
2698     PetscInt rstart;
2699     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2700     *d += rstart;
2701   }
2702   PetscFunctionReturn(PETSC_SUCCESS);
2703 }
2704 
2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2706 {
2707   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2708 
2709   PetscFunctionBegin;
2710   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2715 {
2716   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2717 
2718   PetscFunctionBegin;
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2720   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2725                                        MatGetRow_MPIAIJ,
2726                                        MatRestoreRow_MPIAIJ,
2727                                        MatMult_MPIAIJ,
2728                                        /* 4*/ MatMultAdd_MPIAIJ,
2729                                        MatMultTranspose_MPIAIJ,
2730                                        MatMultTransposeAdd_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        /*10*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        MatSOR_MPIAIJ,
2738                                        MatTranspose_MPIAIJ,
2739                                        /*15*/ MatGetInfo_MPIAIJ,
2740                                        MatEqual_MPIAIJ,
2741                                        MatGetDiagonal_MPIAIJ,
2742                                        MatDiagonalScale_MPIAIJ,
2743                                        MatNorm_MPIAIJ,
2744                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2745                                        MatAssemblyEnd_MPIAIJ,
2746                                        MatSetOption_MPIAIJ,
2747                                        MatZeroEntries_MPIAIJ,
2748                                        /*24*/ MatZeroRows_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        /*29*/ MatSetUp_MPI_Hash,
2754                                        NULL,
2755                                        NULL,
2756                                        MatGetDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        /*34*/ MatDuplicate_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        /*39*/ MatAXPY_MPIAIJ,
2764                                        MatCreateSubMatrices_MPIAIJ,
2765                                        MatIncreaseOverlap_MPIAIJ,
2766                                        MatGetValues_MPIAIJ,
2767                                        MatCopy_MPIAIJ,
2768                                        /*44*/ MatGetRowMax_MPIAIJ,
2769                                        MatScale_MPIAIJ,
2770                                        MatShift_MPIAIJ,
2771                                        MatDiagonalSet_MPIAIJ,
2772                                        MatZeroRowsColumns_MPIAIJ,
2773                                        /*49*/ MatSetRandom_MPIAIJ,
2774                                        MatGetRowIJ_MPIAIJ,
2775                                        MatRestoreRowIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2779                                        NULL,
2780                                        MatSetUnfactored_MPIAIJ,
2781                                        MatPermute_MPIAIJ,
2782                                        NULL,
2783                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2784                                        MatDestroy_MPIAIJ,
2785                                        MatView_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2789                                        NULL,
2790                                        NULL,
2791                                        NULL,
2792                                        MatGetRowMaxAbs_MPIAIJ,
2793                                        /*69*/ MatGetRowMinAbs_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                        MatFDColoringApply_AIJ,
2797                                        MatSetFromOptions_MPIAIJ,
2798                                        MatFindZeroDiagonals_MPIAIJ,
2799                                        /*75*/ NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        MatLoad_MPIAIJ,
2803                                        NULL,
2804                                        /*80*/ NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*83*/ NULL,
2808                                        NULL,
2809                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2810                                        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2811                                        NULL,
2812                                        NULL,
2813                                        /*89*/ MatBindToCPU_MPIAIJ,
2814                                        MatProductSetFromOptions_MPIAIJ,
2815                                        NULL,
2816                                        NULL,
2817                                        MatConjugate_MPIAIJ,
2818                                        /*94*/ NULL,
2819                                        MatSetValuesRow_MPIAIJ,
2820                                        MatRealPart_MPIAIJ,
2821                                        MatImaginaryPart_MPIAIJ,
2822                                        NULL,
2823                                        /*99*/ NULL,
2824                                        NULL,
2825                                        NULL,
2826                                        MatGetRowMin_MPIAIJ,
2827                                        NULL,
2828                                        /*104*/ MatMissingDiagonal_MPIAIJ,
2829                                        MatGetSeqNonzeroStructure_MPIAIJ,
2830                                        NULL,
2831                                        MatGetGhosts_MPIAIJ,
2832                                        NULL,
2833                                        /*109*/ NULL,
2834                                        MatMultDiagonalBlock_MPIAIJ,
2835                                        NULL,
2836                                        NULL,
2837                                        NULL,
2838                                        /*114*/ MatGetMultiProcBlock_MPIAIJ,
2839                                        MatFindNonzeroRows_MPIAIJ,
2840                                        MatGetColumnReductions_MPIAIJ,
2841                                        MatInvertBlockDiagonal_MPIAIJ,
2842                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2843                                        /*119*/ MatCreateSubMatricesMPI_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2847                                        NULL,
2848                                        /*124*/ NULL,
2849                                        NULL,
2850                                        NULL,
2851                                        MatSetBlockSizes_MPIAIJ,
2852                                        NULL,
2853                                        /*129*/ MatFDColoringSetUp_MPIXAIJ,
2854                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2855                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2856                                        NULL,
2857                                        NULL,
2858                                        /*134*/ NULL,
2859                                        MatCreateGraph_Simple_AIJ,
2860                                        NULL,
2861                                        MatEliminateZeros_MPIAIJ,
2862                                        MatGetRowSumAbs_MPIAIJ,
2863                                        /*139*/ NULL,
2864                                        NULL,
2865                                        NULL,
2866                                        MatCopyHashToXAIJ_MPI_Hash};
2867 
2868 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2869 {
2870   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2871 
2872   PetscFunctionBegin;
2873   PetscCall(MatStoreValues(aij->A));
2874   PetscCall(MatStoreValues(aij->B));
2875   PetscFunctionReturn(PETSC_SUCCESS);
2876 }
2877 
2878 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2879 {
2880   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2881 
2882   PetscFunctionBegin;
2883   PetscCall(MatRetrieveValues(aij->A));
2884   PetscCall(MatRetrieveValues(aij->B));
2885   PetscFunctionReturn(PETSC_SUCCESS);
2886 }
2887 
2888 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2889 {
2890   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2891   PetscMPIInt size;
2892 
2893   PetscFunctionBegin;
2894   if (B->hash_active) {
2895     B->ops[0]      = b->cops;
2896     B->hash_active = PETSC_FALSE;
2897   }
2898   PetscCall(PetscLayoutSetUp(B->rmap));
2899   PetscCall(PetscLayoutSetUp(B->cmap));
2900 
2901 #if defined(PETSC_USE_CTABLE)
2902   PetscCall(PetscHMapIDestroy(&b->colmap));
2903 #else
2904   PetscCall(PetscFree(b->colmap));
2905 #endif
2906   PetscCall(PetscFree(b->garray));
2907   PetscCall(VecDestroy(&b->lvec));
2908   PetscCall(VecScatterDestroy(&b->Mvctx));
2909 
2910   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2911 
2912   MatSeqXAIJGetOptions_Private(b->B);
2913   PetscCall(MatDestroy(&b->B));
2914   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2915   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2916   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2917   PetscCall(MatSetType(b->B, MATSEQAIJ));
2918   MatSeqXAIJRestoreOptions_Private(b->B);
2919 
2920   MatSeqXAIJGetOptions_Private(b->A);
2921   PetscCall(MatDestroy(&b->A));
2922   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2923   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2924   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2925   PetscCall(MatSetType(b->A, MATSEQAIJ));
2926   MatSeqXAIJRestoreOptions_Private(b->A);
2927 
2928   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2929   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2930   B->preallocated  = PETSC_TRUE;
2931   B->was_assembled = PETSC_FALSE;
2932   B->assembled     = PETSC_FALSE;
2933   PetscFunctionReturn(PETSC_SUCCESS);
2934 }
2935 
2936 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2937 {
2938   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2939   PetscBool   ondiagreset, offdiagreset, memoryreset;
2940 
2941   PetscFunctionBegin;
2942   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2943   PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()");
2944   if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS);
2945 
2946   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset));
2947   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset));
2948   memoryreset = (PetscBool)(ondiagreset || offdiagreset);
2949   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B)));
2950   if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS);
2951 
2952   PetscCall(PetscLayoutSetUp(B->rmap));
2953   PetscCall(PetscLayoutSetUp(B->cmap));
2954   PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled");
2955   PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2956   PetscCall(VecScatterDestroy(&b->Mvctx));
2957 
2958   B->preallocated  = PETSC_TRUE;
2959   B->was_assembled = PETSC_FALSE;
2960   B->assembled     = PETSC_FALSE;
2961   /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */
2962   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2963   PetscFunctionReturn(PETSC_SUCCESS);
2964 }
2965 
2966 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2967 {
2968   Mat         mat;
2969   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2970 
2971   PetscFunctionBegin;
2972   *newmat = NULL;
2973   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2974   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2975   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2976   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2977   a = (Mat_MPIAIJ *)mat->data;
2978 
2979   mat->factortype = matin->factortype;
2980   mat->assembled  = matin->assembled;
2981   mat->insertmode = NOT_SET_VALUES;
2982 
2983   a->size         = oldmat->size;
2984   a->rank         = oldmat->rank;
2985   a->donotstash   = oldmat->donotstash;
2986   a->roworiented  = oldmat->roworiented;
2987   a->rowindices   = NULL;
2988   a->rowvalues    = NULL;
2989   a->getrowactive = PETSC_FALSE;
2990 
2991   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2992   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2993   if (matin->hash_active) {
2994     PetscCall(MatSetUp(mat));
2995   } else {
2996     mat->preallocated = matin->preallocated;
2997     if (oldmat->colmap) {
2998 #if defined(PETSC_USE_CTABLE)
2999       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3000 #else
3001       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3002       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3003 #endif
3004     } else a->colmap = NULL;
3005     if (oldmat->garray) {
3006       PetscInt len;
3007       len = oldmat->B->cmap->n;
3008       PetscCall(PetscMalloc1(len + 1, &a->garray));
3009       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3010     } else a->garray = NULL;
3011 
3012     /* It may happen MatDuplicate is called with a non-assembled matrix
3013       In fact, MatDuplicate only requires the matrix to be preallocated
3014       This may happen inside a DMCreateMatrix_Shell */
3015     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3016     if (oldmat->Mvctx) {
3017       a->Mvctx = oldmat->Mvctx;
3018       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3019     }
3020     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3021     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3022   }
3023   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3024   *newmat = mat;
3025   PetscFunctionReturn(PETSC_SUCCESS);
3026 }
3027 
3028 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3029 {
3030   PetscBool isbinary, ishdf5;
3031 
3032   PetscFunctionBegin;
3033   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3034   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3035   /* force binary viewer to load .info file if it has not yet done so */
3036   PetscCall(PetscViewerSetUp(viewer));
3037   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3038   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3039   if (isbinary) {
3040     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3041   } else if (ishdf5) {
3042 #if defined(PETSC_HAVE_HDF5)
3043     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3044 #else
3045     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3046 #endif
3047   } else {
3048     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3049   }
3050   PetscFunctionReturn(PETSC_SUCCESS);
3051 }
3052 
3053 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3054 {
3055   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3056   PetscInt    *rowidxs, *colidxs;
3057   PetscScalar *matvals;
3058 
3059   PetscFunctionBegin;
3060   PetscCall(PetscViewerSetUp(viewer));
3061 
3062   /* read in matrix header */
3063   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3064   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3065   M  = header[1];
3066   N  = header[2];
3067   nz = header[3];
3068   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3069   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3070   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3071 
3072   /* set block sizes from the viewer's .info file */
3073   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3074   /* set global sizes if not set already */
3075   if (mat->rmap->N < 0) mat->rmap->N = M;
3076   if (mat->cmap->N < 0) mat->cmap->N = N;
3077   PetscCall(PetscLayoutSetUp(mat->rmap));
3078   PetscCall(PetscLayoutSetUp(mat->cmap));
3079 
3080   /* check if the matrix sizes are correct */
3081   PetscCall(MatGetSize(mat, &rows, &cols));
3082   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3083 
3084   /* read in row lengths and build row indices */
3085   PetscCall(MatGetLocalSize(mat, &m, NULL));
3086   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3087   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3088   rowidxs[0] = 0;
3089   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3090   if (nz != PETSC_INT_MAX) {
3091     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3092     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3093   }
3094 
3095   /* read in column indices and matrix values */
3096   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3097   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3098   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3099   /* store matrix indices and values */
3100   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3101   PetscCall(PetscFree(rowidxs));
3102   PetscCall(PetscFree2(colidxs, matvals));
3103   PetscFunctionReturn(PETSC_SUCCESS);
3104 }
3105 
3106 /* Not scalable because of ISAllGather() unless getting all columns. */
3107 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3108 {
3109   IS          iscol_local;
3110   PetscBool   isstride;
3111   PetscMPIInt gisstride = 0;
3112 
3113   PetscFunctionBegin;
3114   /* check if we are grabbing all columns*/
3115   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3116 
3117   if (isstride) {
3118     PetscInt start, len, mstart, mlen;
3119     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3120     PetscCall(ISGetLocalSize(iscol, &len));
3121     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3122     if (mstart == start && mlen - mstart == len) gisstride = 1;
3123   }
3124 
3125   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3126   if (gisstride) {
3127     PetscInt N;
3128     PetscCall(MatGetSize(mat, NULL, &N));
3129     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3130     PetscCall(ISSetIdentity(iscol_local));
3131     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3132   } else {
3133     PetscInt cbs;
3134     PetscCall(ISGetBlockSize(iscol, &cbs));
3135     PetscCall(ISAllGather(iscol, &iscol_local));
3136     PetscCall(ISSetBlockSize(iscol_local, cbs));
3137   }
3138 
3139   *isseq = iscol_local;
3140   PetscFunctionReturn(PETSC_SUCCESS);
3141 }
3142 
3143 /*
3144  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3145  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3146 
3147  Input Parameters:
3148 +   mat - matrix
3149 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3150            i.e., mat->rstart <= isrow[i] < mat->rend
3151 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3152            i.e., mat->cstart <= iscol[i] < mat->cend
3153 
3154  Output Parameters:
3155 +   isrow_d - sequential row index set for retrieving mat->A
3156 .   iscol_d - sequential  column index set for retrieving mat->A
3157 .   iscol_o - sequential column index set for retrieving mat->B
3158 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3159  */
3160 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3161 {
3162   Vec             x, cmap;
3163   const PetscInt *is_idx;
3164   PetscScalar    *xarray, *cmaparray;
3165   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3166   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3167   Mat             B    = a->B;
3168   Vec             lvec = a->lvec, lcmap;
3169   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3170   MPI_Comm        comm;
3171   VecScatter      Mvctx = a->Mvctx;
3172 
3173   PetscFunctionBegin;
3174   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3175   PetscCall(ISGetLocalSize(iscol, &ncols));
3176 
3177   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3178   PetscCall(MatCreateVecs(mat, &x, NULL));
3179   PetscCall(VecSet(x, -1.0));
3180   PetscCall(VecDuplicate(x, &cmap));
3181   PetscCall(VecSet(cmap, -1.0));
3182 
3183   /* Get start indices */
3184   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3185   isstart -= ncols;
3186   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3187 
3188   PetscCall(ISGetIndices(iscol, &is_idx));
3189   PetscCall(VecGetArray(x, &xarray));
3190   PetscCall(VecGetArray(cmap, &cmaparray));
3191   PetscCall(PetscMalloc1(ncols, &idx));
3192   for (i = 0; i < ncols; i++) {
3193     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3194     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3195     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3196   }
3197   PetscCall(VecRestoreArray(x, &xarray));
3198   PetscCall(VecRestoreArray(cmap, &cmaparray));
3199   PetscCall(ISRestoreIndices(iscol, &is_idx));
3200 
3201   /* Get iscol_d */
3202   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3203   PetscCall(ISGetBlockSize(iscol, &i));
3204   PetscCall(ISSetBlockSize(*iscol_d, i));
3205 
3206   /* Get isrow_d */
3207   PetscCall(ISGetLocalSize(isrow, &m));
3208   rstart = mat->rmap->rstart;
3209   PetscCall(PetscMalloc1(m, &idx));
3210   PetscCall(ISGetIndices(isrow, &is_idx));
3211   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3212   PetscCall(ISRestoreIndices(isrow, &is_idx));
3213 
3214   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3215   PetscCall(ISGetBlockSize(isrow, &i));
3216   PetscCall(ISSetBlockSize(*isrow_d, i));
3217 
3218   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3219   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3220   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3221 
3222   PetscCall(VecDuplicate(lvec, &lcmap));
3223 
3224   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3225   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3226 
3227   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3228   /* off-process column indices */
3229   count = 0;
3230   PetscCall(PetscMalloc1(Bn, &idx));
3231   PetscCall(PetscMalloc1(Bn, &cmap1));
3232 
3233   PetscCall(VecGetArray(lvec, &xarray));
3234   PetscCall(VecGetArray(lcmap, &cmaparray));
3235   for (i = 0; i < Bn; i++) {
3236     if (PetscRealPart(xarray[i]) > -1.0) {
3237       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3238       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3239       count++;
3240     }
3241   }
3242   PetscCall(VecRestoreArray(lvec, &xarray));
3243   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3244 
3245   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3246   /* cannot ensure iscol_o has same blocksize as iscol! */
3247 
3248   PetscCall(PetscFree(idx));
3249   *garray = cmap1;
3250 
3251   PetscCall(VecDestroy(&x));
3252   PetscCall(VecDestroy(&cmap));
3253   PetscCall(VecDestroy(&lcmap));
3254   PetscFunctionReturn(PETSC_SUCCESS);
3255 }
3256 
3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3259 {
3260   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3261   Mat         M = NULL;
3262   MPI_Comm    comm;
3263   IS          iscol_d, isrow_d, iscol_o;
3264   Mat         Asub = NULL, Bsub = NULL;
3265   PetscInt    n, count, M_size, N_size;
3266 
3267   PetscFunctionBegin;
3268   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3269 
3270   if (call == MAT_REUSE_MATRIX) {
3271     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3272     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3273     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3274 
3275     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3276     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3277 
3278     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3279     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3280 
3281     /* Update diagonal and off-diagonal portions of submat */
3282     asub = (Mat_MPIAIJ *)(*submat)->data;
3283     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3284     PetscCall(ISGetLocalSize(iscol_o, &n));
3285     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3286     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3287     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3288 
3289   } else { /* call == MAT_INITIAL_MATRIX) */
3290     PetscInt *garray, *garray_compact;
3291     PetscInt  BsubN;
3292 
3293     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3294     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3295 
3296     /* Create local submatrices Asub and Bsub */
3297     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3298     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3299 
3300     // Compact garray so its not of size Bn
3301     PetscCall(ISGetSize(iscol_o, &count));
3302     PetscCall(PetscMalloc1(count, &garray_compact));
3303     PetscCall(PetscArraycpy(garray_compact, garray, count));
3304 
3305     /* Create submatrix M */
3306     PetscCall(ISGetSize(isrow, &M_size));
3307     PetscCall(ISGetSize(iscol, &N_size));
3308     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M));
3309 
3310     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3311     asub = (Mat_MPIAIJ *)M->data;
3312 
3313     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3314     n = asub->B->cmap->N;
3315     if (BsubN > n) {
3316       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3317       const PetscInt *idx;
3318       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3319       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3320 
3321       PetscCall(PetscMalloc1(n, &idx_new));
3322       j = 0;
3323       PetscCall(ISGetIndices(iscol_o, &idx));
3324       for (i = 0; i < n; i++) {
3325         if (j >= BsubN) break;
3326         while (subgarray[i] > garray[j]) j++;
3327 
3328         if (subgarray[i] == garray[j]) {
3329           idx_new[i] = idx[j++];
3330         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3331       }
3332       PetscCall(ISRestoreIndices(iscol_o, &idx));
3333 
3334       PetscCall(ISDestroy(&iscol_o));
3335       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3336 
3337     } else if (BsubN < n) {
3338       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3339     }
3340 
3341     PetscCall(PetscFree(garray));
3342     *submat = M;
3343 
3344     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3345     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3346     PetscCall(ISDestroy(&isrow_d));
3347 
3348     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3349     PetscCall(ISDestroy(&iscol_d));
3350 
3351     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3352     PetscCall(ISDestroy(&iscol_o));
3353   }
3354   PetscFunctionReturn(PETSC_SUCCESS);
3355 }
3356 
3357 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3358 {
3359   IS        iscol_local = NULL, isrow_d;
3360   PetscInt  csize;
3361   PetscInt  n, i, j, start, end;
3362   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3363   MPI_Comm  comm;
3364 
3365   PetscFunctionBegin;
3366   /* If isrow has same processor distribution as mat,
3367      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3368   if (call == MAT_REUSE_MATRIX) {
3369     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3370     if (isrow_d) {
3371       sameRowDist  = PETSC_TRUE;
3372       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3373     } else {
3374       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3375       if (iscol_local) {
3376         sameRowDist  = PETSC_TRUE;
3377         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3378       }
3379     }
3380   } else {
3381     /* Check if isrow has same processor distribution as mat */
3382     sameDist[0] = PETSC_FALSE;
3383     PetscCall(ISGetLocalSize(isrow, &n));
3384     if (!n) {
3385       sameDist[0] = PETSC_TRUE;
3386     } else {
3387       PetscCall(ISGetMinMax(isrow, &i, &j));
3388       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3389       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3390     }
3391 
3392     /* Check if iscol has same processor distribution as mat */
3393     sameDist[1] = PETSC_FALSE;
3394     PetscCall(ISGetLocalSize(iscol, &n));
3395     if (!n) {
3396       sameDist[1] = PETSC_TRUE;
3397     } else {
3398       PetscCall(ISGetMinMax(iscol, &i, &j));
3399       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3400       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3401     }
3402 
3403     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3404     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3405     sameRowDist = tsameDist[0];
3406   }
3407 
3408   if (sameRowDist) {
3409     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3410       /* isrow and iscol have same processor distribution as mat */
3411       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3412       PetscFunctionReturn(PETSC_SUCCESS);
3413     } else { /* sameRowDist */
3414       /* isrow has same processor distribution as mat */
3415       if (call == MAT_INITIAL_MATRIX) {
3416         PetscBool sorted;
3417         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3418         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3419         PetscCall(ISGetSize(iscol, &i));
3420         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3421 
3422         PetscCall(ISSorted(iscol_local, &sorted));
3423         if (sorted) {
3424           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3425           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3426           PetscFunctionReturn(PETSC_SUCCESS);
3427         }
3428       } else { /* call == MAT_REUSE_MATRIX */
3429         IS iscol_sub;
3430         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3431         if (iscol_sub) {
3432           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3433           PetscFunctionReturn(PETSC_SUCCESS);
3434         }
3435       }
3436     }
3437   }
3438 
3439   /* General case: iscol -> iscol_local which has global size of iscol */
3440   if (call == MAT_REUSE_MATRIX) {
3441     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3442     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3443   } else {
3444     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3445   }
3446 
3447   PetscCall(ISGetLocalSize(iscol, &csize));
3448   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3449 
3450   if (call == MAT_INITIAL_MATRIX) {
3451     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3452     PetscCall(ISDestroy(&iscol_local));
3453   }
3454   PetscFunctionReturn(PETSC_SUCCESS);
3455 }
3456 
3457 /*@C
3458   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3459   and "off-diagonal" part of the matrix in CSR format.
3460 
3461   Collective
3462 
3463   Input Parameters:
3464 + comm   - MPI communicator
3465 . M      - the global row size
3466 . N      - the global column size
3467 . A      - "diagonal" portion of matrix
3468 . B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3469 - garray - either `NULL` or the global index of `B` columns
3470 
3471   Output Parameter:
3472 . mat - the matrix, with input `A` as its local diagonal matrix
3473 
3474   Level: advanced
3475 
3476   Notes:
3477   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3478 
3479   `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore.
3480 
3481 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3482 @*/
3483 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat)
3484 {
3485   PetscInt    m, n;
3486   MatType     mpi_mat_type;
3487   Mat_MPIAIJ *mpiaij;
3488   Mat         C;
3489 
3490   PetscFunctionBegin;
3491   PetscCall(MatCreate(comm, &C));
3492   PetscCall(MatGetSize(A, &m, &n));
3493   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3494   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3495 
3496   PetscCall(MatSetSizes(C, m, n, M, N));
3497   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3498   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3499   PetscCall(MatSetType(C, mpi_mat_type));
3500 
3501   PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs));
3502   PetscCall(PetscLayoutSetUp(C->rmap));
3503   PetscCall(PetscLayoutSetUp(C->cmap));
3504 
3505   mpiaij              = (Mat_MPIAIJ *)C->data;
3506   mpiaij->A           = A;
3507   mpiaij->B           = B;
3508   mpiaij->garray      = garray;
3509   C->preallocated     = PETSC_TRUE;
3510   C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */
3511 
3512   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3513   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3514   /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and
3515    also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced
3516    */
3517   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3518   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3519   PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3520   *mat = C;
3521   PetscFunctionReturn(PETSC_SUCCESS);
3522 }
3523 
3524 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3525 
3526 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3527 {
3528   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3529   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3530   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3531   Mat             M, Msub, B = a->B;
3532   MatScalar      *aa;
3533   Mat_SeqAIJ     *aij;
3534   PetscInt       *garray = a->garray, *colsub, Ncols;
3535   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3536   IS              iscol_sub, iscmap;
3537   const PetscInt *is_idx, *cmap;
3538   PetscBool       allcolumns = PETSC_FALSE;
3539   MPI_Comm        comm;
3540 
3541   PetscFunctionBegin;
3542   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3543   if (call == MAT_REUSE_MATRIX) {
3544     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3545     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3546     PetscCall(ISGetLocalSize(iscol_sub, &count));
3547 
3548     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3549     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3550 
3551     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3552     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3553 
3554     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3555 
3556   } else { /* call == MAT_INITIAL_MATRIX) */
3557     PetscBool flg;
3558 
3559     PetscCall(ISGetLocalSize(iscol, &n));
3560     PetscCall(ISGetSize(iscol, &Ncols));
3561 
3562     /* (1) iscol -> nonscalable iscol_local */
3563     /* Check for special case: each processor gets entire matrix columns */
3564     PetscCall(ISIdentity(iscol_local, &flg));
3565     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3566     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3567     if (allcolumns) {
3568       iscol_sub = iscol_local;
3569       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3570       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3571 
3572     } else {
3573       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3574       PetscInt *idx, *cmap1, k;
3575       PetscCall(PetscMalloc1(Ncols, &idx));
3576       PetscCall(PetscMalloc1(Ncols, &cmap1));
3577       PetscCall(ISGetIndices(iscol_local, &is_idx));
3578       count = 0;
3579       k     = 0;
3580       for (i = 0; i < Ncols; i++) {
3581         j = is_idx[i];
3582         if (j >= cstart && j < cend) {
3583           /* diagonal part of mat */
3584           idx[count]     = j;
3585           cmap1[count++] = i; /* column index in submat */
3586         } else if (Bn) {
3587           /* off-diagonal part of mat */
3588           if (j == garray[k]) {
3589             idx[count]     = j;
3590             cmap1[count++] = i; /* column index in submat */
3591           } else if (j > garray[k]) {
3592             while (j > garray[k] && k < Bn - 1) k++;
3593             if (j == garray[k]) {
3594               idx[count]     = j;
3595               cmap1[count++] = i; /* column index in submat */
3596             }
3597           }
3598         }
3599       }
3600       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3601 
3602       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3603       PetscCall(ISGetBlockSize(iscol, &cbs));
3604       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3605 
3606       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3607     }
3608 
3609     /* (3) Create sequential Msub */
3610     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3611   }
3612 
3613   PetscCall(ISGetLocalSize(iscol_sub, &count));
3614   aij = (Mat_SeqAIJ *)Msub->data;
3615   ii  = aij->i;
3616   PetscCall(ISGetIndices(iscmap, &cmap));
3617 
3618   /*
3619       m - number of local rows
3620       Ncols - number of columns (same on all processors)
3621       rstart - first row in new global matrix generated
3622   */
3623   PetscCall(MatGetSize(Msub, &m, NULL));
3624 
3625   if (call == MAT_INITIAL_MATRIX) {
3626     /* (4) Create parallel newmat */
3627     PetscMPIInt rank, size;
3628     PetscInt    csize;
3629 
3630     PetscCallMPI(MPI_Comm_size(comm, &size));
3631     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3632 
3633     /*
3634         Determine the number of non-zeros in the diagonal and off-diagonal
3635         portions of the matrix in order to do correct preallocation
3636     */
3637 
3638     /* first get start and end of "diagonal" columns */
3639     PetscCall(ISGetLocalSize(iscol, &csize));
3640     if (csize == PETSC_DECIDE) {
3641       PetscCall(ISGetSize(isrow, &mglobal));
3642       if (mglobal == Ncols) { /* square matrix */
3643         nlocal = m;
3644       } else {
3645         nlocal = Ncols / size + ((Ncols % size) > rank);
3646       }
3647     } else {
3648       nlocal = csize;
3649     }
3650     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3651     rstart = rend - nlocal;
3652     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3653 
3654     /* next, compute all the lengths */
3655     jj = aij->j;
3656     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3657     olens = dlens + m;
3658     for (i = 0; i < m; i++) {
3659       jend = ii[i + 1] - ii[i];
3660       olen = 0;
3661       dlen = 0;
3662       for (j = 0; j < jend; j++) {
3663         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3664         else dlen++;
3665         jj++;
3666       }
3667       olens[i] = olen;
3668       dlens[i] = dlen;
3669     }
3670 
3671     PetscCall(ISGetBlockSize(isrow, &bs));
3672     PetscCall(ISGetBlockSize(iscol, &cbs));
3673 
3674     PetscCall(MatCreate(comm, &M));
3675     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3676     PetscCall(MatSetBlockSizes(M, bs, cbs));
3677     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3678     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3679     PetscCall(PetscFree(dlens));
3680 
3681   } else { /* call == MAT_REUSE_MATRIX */
3682     M = *newmat;
3683     PetscCall(MatGetLocalSize(M, &i, NULL));
3684     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3685     PetscCall(MatZeroEntries(M));
3686     /*
3687          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3688        rather than the slower MatSetValues().
3689     */
3690     M->was_assembled = PETSC_TRUE;
3691     M->assembled     = PETSC_FALSE;
3692   }
3693 
3694   /* (5) Set values of Msub to *newmat */
3695   PetscCall(PetscMalloc1(count, &colsub));
3696   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3697 
3698   jj = aij->j;
3699   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3700   for (i = 0; i < m; i++) {
3701     row = rstart + i;
3702     nz  = ii[i + 1] - ii[i];
3703     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3704     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3705     jj += nz;
3706     aa += nz;
3707   }
3708   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3709   PetscCall(ISRestoreIndices(iscmap, &cmap));
3710 
3711   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3712   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3713 
3714   PetscCall(PetscFree(colsub));
3715 
3716   /* save Msub, iscol_sub and iscmap used in processor for next request */
3717   if (call == MAT_INITIAL_MATRIX) {
3718     *newmat = M;
3719     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3720     PetscCall(MatDestroy(&Msub));
3721 
3722     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3723     PetscCall(ISDestroy(&iscol_sub));
3724 
3725     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3726     PetscCall(ISDestroy(&iscmap));
3727 
3728     if (iscol_local) {
3729       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3730       PetscCall(ISDestroy(&iscol_local));
3731     }
3732   }
3733   PetscFunctionReturn(PETSC_SUCCESS);
3734 }
3735 
3736 /*
3737     Not great since it makes two copies of the submatrix, first an SeqAIJ
3738   in local and then by concatenating the local matrices the end result.
3739   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3740 
3741   This requires a sequential iscol with all indices.
3742 */
3743 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3744 {
3745   PetscMPIInt rank, size;
3746   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3747   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3748   Mat         M, Mreuse;
3749   MatScalar  *aa, *vwork;
3750   MPI_Comm    comm;
3751   Mat_SeqAIJ *aij;
3752   PetscBool   colflag, allcolumns = PETSC_FALSE;
3753 
3754   PetscFunctionBegin;
3755   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3756   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3757   PetscCallMPI(MPI_Comm_size(comm, &size));
3758 
3759   /* Check for special case: each processor gets entire matrix columns */
3760   PetscCall(ISIdentity(iscol, &colflag));
3761   PetscCall(ISGetLocalSize(iscol, &n));
3762   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3763   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3764 
3765   if (call == MAT_REUSE_MATRIX) {
3766     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3767     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3768     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3769   } else {
3770     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3771   }
3772 
3773   /*
3774       m - number of local rows
3775       n - number of columns (same on all processors)
3776       rstart - first row in new global matrix generated
3777   */
3778   PetscCall(MatGetSize(Mreuse, &m, &n));
3779   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3780   if (call == MAT_INITIAL_MATRIX) {
3781     aij = (Mat_SeqAIJ *)Mreuse->data;
3782     ii  = aij->i;
3783     jj  = aij->j;
3784 
3785     /*
3786         Determine the number of non-zeros in the diagonal and off-diagonal
3787         portions of the matrix in order to do correct preallocation
3788     */
3789 
3790     /* first get start and end of "diagonal" columns */
3791     if (csize == PETSC_DECIDE) {
3792       PetscCall(ISGetSize(isrow, &mglobal));
3793       if (mglobal == n) { /* square matrix */
3794         nlocal = m;
3795       } else {
3796         nlocal = n / size + ((n % size) > rank);
3797       }
3798     } else {
3799       nlocal = csize;
3800     }
3801     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3802     rstart = rend - nlocal;
3803     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3804 
3805     /* next, compute all the lengths */
3806     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3807     olens = dlens + m;
3808     for (i = 0; i < m; i++) {
3809       jend = ii[i + 1] - ii[i];
3810       olen = 0;
3811       dlen = 0;
3812       for (j = 0; j < jend; j++) {
3813         if (*jj < rstart || *jj >= rend) olen++;
3814         else dlen++;
3815         jj++;
3816       }
3817       olens[i] = olen;
3818       dlens[i] = dlen;
3819     }
3820     PetscCall(MatCreate(comm, &M));
3821     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3822     PetscCall(MatSetBlockSizes(M, bs, cbs));
3823     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3824     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3825     PetscCall(PetscFree(dlens));
3826   } else {
3827     PetscInt ml, nl;
3828 
3829     M = *newmat;
3830     PetscCall(MatGetLocalSize(M, &ml, &nl));
3831     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3832     PetscCall(MatZeroEntries(M));
3833     /*
3834          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3835        rather than the slower MatSetValues().
3836     */
3837     M->was_assembled = PETSC_TRUE;
3838     M->assembled     = PETSC_FALSE;
3839   }
3840   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3841   aij = (Mat_SeqAIJ *)Mreuse->data;
3842   ii  = aij->i;
3843   jj  = aij->j;
3844 
3845   /* trigger copy to CPU if needed */
3846   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3847   for (i = 0; i < m; i++) {
3848     row   = rstart + i;
3849     nz    = ii[i + 1] - ii[i];
3850     cwork = jj;
3851     jj    = PetscSafePointerPlusOffset(jj, nz);
3852     vwork = aa;
3853     aa    = PetscSafePointerPlusOffset(aa, nz);
3854     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3855   }
3856   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3857 
3858   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3859   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3860   *newmat = M;
3861 
3862   /* save submatrix used in processor for next request */
3863   if (call == MAT_INITIAL_MATRIX) {
3864     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3865     PetscCall(MatDestroy(&Mreuse));
3866   }
3867   PetscFunctionReturn(PETSC_SUCCESS);
3868 }
3869 
3870 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3871 {
3872   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3873   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3874   const PetscInt *JJ;
3875   PetscBool       nooffprocentries;
3876   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3877 
3878   PetscFunctionBegin;
3879   PetscCall(PetscLayoutSetUp(B->rmap));
3880   PetscCall(PetscLayoutSetUp(B->cmap));
3881   m       = B->rmap->n;
3882   cstart  = B->cmap->rstart;
3883   cend    = B->cmap->rend;
3884   rstart  = B->rmap->rstart;
3885   irstart = Ii[0];
3886 
3887   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3888 
3889   if (PetscDefined(USE_DEBUG)) {
3890     for (i = 0; i < m; i++) {
3891       nnz = Ii[i + 1] - Ii[i];
3892       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3893       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3894       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3895       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3896     }
3897   }
3898 
3899   for (i = 0; i < m; i++) {
3900     nnz     = Ii[i + 1] - Ii[i];
3901     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3902     nnz_max = PetscMax(nnz_max, nnz);
3903     d       = 0;
3904     for (j = 0; j < nnz; j++) {
3905       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3906     }
3907     d_nnz[i] = d;
3908     o_nnz[i] = nnz - d;
3909   }
3910   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3911   PetscCall(PetscFree2(d_nnz, o_nnz));
3912 
3913   for (i = 0; i < m; i++) {
3914     ii = i + rstart;
3915     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3916   }
3917   nooffprocentries    = B->nooffprocentries;
3918   B->nooffprocentries = PETSC_TRUE;
3919   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3920   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3921   B->nooffprocentries = nooffprocentries;
3922 
3923   /* count number of entries below block diagonal */
3924   PetscCall(PetscFree(Aij->ld));
3925   PetscCall(PetscCalloc1(m, &ld));
3926   Aij->ld = ld;
3927   for (i = 0; i < m; i++) {
3928     nnz = Ii[i + 1] - Ii[i];
3929     j   = 0;
3930     while (j < nnz && J[j] < cstart) j++;
3931     ld[i] = j;
3932     if (J) J += nnz;
3933   }
3934 
3935   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3936   PetscFunctionReturn(PETSC_SUCCESS);
3937 }
3938 
3939 /*@
3940   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3941   (the default parallel PETSc format).
3942 
3943   Collective
3944 
3945   Input Parameters:
3946 + B - the matrix
3947 . i - the indices into `j` for the start of each local row (indices start with zero)
3948 . j - the column indices for each local row (indices start with zero)
3949 - v - optional values in the matrix
3950 
3951   Level: developer
3952 
3953   Notes:
3954   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3955   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3956   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3957 
3958   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3959 
3960   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3961 
3962   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3963 
3964   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3965   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3966 
3967   The format which is used for the sparse matrix input, is equivalent to a
3968   row-major ordering.. i.e for the following matrix, the input data expected is
3969   as shown
3970 .vb
3971         1 0 0
3972         2 0 3     P0
3973        -------
3974         4 5 6     P1
3975 
3976      Process0 [P0] rows_owned=[0,1]
3977         i =  {0,1,3}  [size = nrow+1  = 2+1]
3978         j =  {0,0,2}  [size = 3]
3979         v =  {1,2,3}  [size = 3]
3980 
3981      Process1 [P1] rows_owned=[2]
3982         i =  {0,3}    [size = nrow+1  = 1+1]
3983         j =  {0,1,2}  [size = 3]
3984         v =  {4,5,6}  [size = 3]
3985 .ve
3986 
3987 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
3988           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
3989 @*/
3990 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3991 {
3992   PetscFunctionBegin;
3993   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3994   PetscFunctionReturn(PETSC_SUCCESS);
3995 }
3996 
3997 /*@
3998   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3999   (the default parallel PETSc format).  For good matrix assembly performance
4000   the user should preallocate the matrix storage by setting the parameters
4001   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4002 
4003   Collective
4004 
4005   Input Parameters:
4006 + B     - the matrix
4007 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4008            (same value is used for all local rows)
4009 . d_nnz - array containing the number of nonzeros in the various rows of the
4010            DIAGONAL portion of the local submatrix (possibly different for each row)
4011            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4012            The size of this array is equal to the number of local rows, i.e 'm'.
4013            For matrices that will be factored, you must leave room for (and set)
4014            the diagonal entry even if it is zero.
4015 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4016            submatrix (same value is used for all local rows).
4017 - o_nnz - array containing the number of nonzeros in the various rows of the
4018            OFF-DIAGONAL portion of the local submatrix (possibly different for
4019            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4020            structure. The size of this array is equal to the number
4021            of local rows, i.e 'm'.
4022 
4023   Example Usage:
4024   Consider the following 8x8 matrix with 34 non-zero values, that is
4025   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4026   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4027   as follows
4028 
4029 .vb
4030             1  2  0  |  0  3  0  |  0  4
4031     Proc0   0  5  6  |  7  0  0  |  8  0
4032             9  0 10  | 11  0  0  | 12  0
4033     -------------------------------------
4034            13  0 14  | 15 16 17  |  0  0
4035     Proc1   0 18  0  | 19 20 21  |  0  0
4036             0  0  0  | 22 23  0  | 24  0
4037     -------------------------------------
4038     Proc2  25 26 27  |  0  0 28  | 29  0
4039            30  0  0  | 31 32 33  |  0 34
4040 .ve
4041 
4042   This can be represented as a collection of submatrices as
4043 .vb
4044       A B C
4045       D E F
4046       G H I
4047 .ve
4048 
4049   Where the submatrices A,B,C are owned by proc0, D,E,F are
4050   owned by proc1, G,H,I are owned by proc2.
4051 
4052   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4053   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4054   The 'M','N' parameters are 8,8, and have the same values on all procs.
4055 
4056   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4057   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4058   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4059   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4060   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4061   matrix, and [DF] as another `MATSEQAIJ` matrix.
4062 
4063   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4064   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4065   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4066   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4067   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4068   In this case, the values of `d_nz`, `o_nz` are
4069 .vb
4070      proc0  dnz = 2, o_nz = 2
4071      proc1  dnz = 3, o_nz = 2
4072      proc2  dnz = 1, o_nz = 4
4073 .ve
4074   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4075   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4076   for proc3. i.e we are using 12+15+10=37 storage locations to store
4077   34 values.
4078 
4079   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4080   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4081   In the above case the values for `d_nnz`, `o_nnz` are
4082 .vb
4083      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4084      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4085      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4086 .ve
4087   Here the space allocated is sum of all the above values i.e 34, and
4088   hence pre-allocation is perfect.
4089 
4090   Level: intermediate
4091 
4092   Notes:
4093   If the *_nnz parameter is given then the *_nz parameter is ignored
4094 
4095   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4096   storage.  The stored row and column indices begin with zero.
4097   See [Sparse Matrices](sec_matsparse) for details.
4098 
4099   The parallel matrix is partitioned such that the first m0 rows belong to
4100   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4101   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4102 
4103   The DIAGONAL portion of the local submatrix of a processor can be defined
4104   as the submatrix which is obtained by extraction the part corresponding to
4105   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4106   first row that belongs to the processor, r2 is the last row belonging to
4107   the this processor, and c1-c2 is range of indices of the local part of a
4108   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4109   common case of a square matrix, the row and column ranges are the same and
4110   the DIAGONAL part is also square. The remaining portion of the local
4111   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4112 
4113   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4114 
4115   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4116   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4117   You can also run with the option `-info` and look for messages with the string
4118   malloc in them to see if additional memory allocation was needed.
4119 
4120 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4121           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4122 @*/
4123 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4124 {
4125   PetscFunctionBegin;
4126   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4127   PetscValidType(B, 1);
4128   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4129   PetscFunctionReturn(PETSC_SUCCESS);
4130 }
4131 
4132 /*@
4133   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4134   CSR format for the local rows.
4135 
4136   Collective
4137 
4138   Input Parameters:
4139 + comm - MPI communicator
4140 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4141 . n    - This value should be the same as the local size used in creating the
4142          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4143          calculated if `N` is given) For square matrices n is almost always `m`.
4144 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4145 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4146 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4147 . j    - global column indices
4148 - a    - optional matrix values
4149 
4150   Output Parameter:
4151 . mat - the matrix
4152 
4153   Level: intermediate
4154 
4155   Notes:
4156   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4157   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4158   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4159 
4160   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4161 
4162   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4163 
4164   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4165   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4166 
4167   The format which is used for the sparse matrix input, is equivalent to a
4168   row-major ordering, i.e., for the following matrix, the input data expected is
4169   as shown
4170 .vb
4171         1 0 0
4172         2 0 3     P0
4173        -------
4174         4 5 6     P1
4175 
4176      Process0 [P0] rows_owned=[0,1]
4177         i =  {0,1,3}  [size = nrow+1  = 2+1]
4178         j =  {0,0,2}  [size = 3]
4179         v =  {1,2,3}  [size = 3]
4180 
4181      Process1 [P1] rows_owned=[2]
4182         i =  {0,3}    [size = nrow+1  = 1+1]
4183         j =  {0,1,2}  [size = 3]
4184         v =  {4,5,6}  [size = 3]
4185 .ve
4186 
4187 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4188           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4189 @*/
4190 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4191 {
4192   PetscFunctionBegin;
4193   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4194   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4195   PetscCall(MatCreate(comm, mat));
4196   PetscCall(MatSetSizes(*mat, m, n, M, N));
4197   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4198   PetscCall(MatSetType(*mat, MATMPIAIJ));
4199   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4200   PetscFunctionReturn(PETSC_SUCCESS);
4201 }
4202 
4203 /*@
4204   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4205   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4206   from `MatCreateMPIAIJWithArrays()`
4207 
4208   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4209 
4210   Collective
4211 
4212   Input Parameters:
4213 + mat - the matrix
4214 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4215 . n   - This value should be the same as the local size used in creating the
4216        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4217        calculated if N is given) For square matrices n is almost always m.
4218 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4219 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4220 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4221 . J   - column indices
4222 - v   - matrix values
4223 
4224   Level: deprecated
4225 
4226 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4227           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4228 @*/
4229 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4230 {
4231   PetscInt        nnz, i;
4232   PetscBool       nooffprocentries;
4233   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4234   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4235   PetscScalar    *ad, *ao;
4236   PetscInt        ldi, Iii, md;
4237   const PetscInt *Adi = Ad->i;
4238   PetscInt       *ld  = Aij->ld;
4239 
4240   PetscFunctionBegin;
4241   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4242   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4243   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4244   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4245 
4246   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4247   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4248 
4249   for (i = 0; i < m; i++) {
4250     if (PetscDefined(USE_DEBUG)) {
4251       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4252         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4253         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4254       }
4255     }
4256     nnz = Ii[i + 1] - Ii[i];
4257     Iii = Ii[i];
4258     ldi = ld[i];
4259     md  = Adi[i + 1] - Adi[i];
4260     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4261     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4262     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4263     ad += md;
4264     ao += nnz - md;
4265   }
4266   nooffprocentries      = mat->nooffprocentries;
4267   mat->nooffprocentries = PETSC_TRUE;
4268   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4269   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4270   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4271   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4272   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4273   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4274   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4275   mat->nooffprocentries = nooffprocentries;
4276   PetscFunctionReturn(PETSC_SUCCESS);
4277 }
4278 
4279 /*@
4280   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4281 
4282   Collective
4283 
4284   Input Parameters:
4285 + mat - the matrix
4286 - v   - matrix values, stored by row
4287 
4288   Level: intermediate
4289 
4290   Notes:
4291   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4292 
4293   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4294 
4295 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4296           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4297 @*/
4298 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4299 {
4300   PetscInt        nnz, i, m;
4301   PetscBool       nooffprocentries;
4302   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4303   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4304   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4305   PetscScalar    *ad, *ao;
4306   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4307   PetscInt        ldi, Iii, md;
4308   PetscInt       *ld = Aij->ld;
4309 
4310   PetscFunctionBegin;
4311   m = mat->rmap->n;
4312 
4313   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4314   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4315   Iii = 0;
4316   for (i = 0; i < m; i++) {
4317     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4318     ldi = ld[i];
4319     md  = Adi[i + 1] - Adi[i];
4320     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4321     ad += md;
4322     if (ao) {
4323       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4324       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4325       ao += nnz - md;
4326     }
4327     Iii += nnz;
4328   }
4329   nooffprocentries      = mat->nooffprocentries;
4330   mat->nooffprocentries = PETSC_TRUE;
4331   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4332   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4333   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4334   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4335   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4336   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4337   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4338   mat->nooffprocentries = nooffprocentries;
4339   PetscFunctionReturn(PETSC_SUCCESS);
4340 }
4341 
4342 /*@
4343   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4344   (the default parallel PETSc format).  For good matrix assembly performance
4345   the user should preallocate the matrix storage by setting the parameters
4346   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4347 
4348   Collective
4349 
4350   Input Parameters:
4351 + comm  - MPI communicator
4352 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4353           This value should be the same as the local size used in creating the
4354           y vector for the matrix-vector product y = Ax.
4355 . n     - This value should be the same as the local size used in creating the
4356           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4357           calculated if N is given) For square matrices n is almost always m.
4358 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4359 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4360 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4361           (same value is used for all local rows)
4362 . d_nnz - array containing the number of nonzeros in the various rows of the
4363           DIAGONAL portion of the local submatrix (possibly different for each row)
4364           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4365           The size of this array is equal to the number of local rows, i.e 'm'.
4366 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4367           submatrix (same value is used for all local rows).
4368 - o_nnz - array containing the number of nonzeros in the various rows of the
4369           OFF-DIAGONAL portion of the local submatrix (possibly different for
4370           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4371           structure. The size of this array is equal to the number
4372           of local rows, i.e 'm'.
4373 
4374   Output Parameter:
4375 . A - the matrix
4376 
4377   Options Database Keys:
4378 + -mat_no_inode                     - Do not use inodes
4379 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4380 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4381                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4382                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4383 
4384   Level: intermediate
4385 
4386   Notes:
4387   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4388   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4389   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4390 
4391   If the *_nnz parameter is given then the *_nz parameter is ignored
4392 
4393   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4394   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4395   storage requirements for this matrix.
4396 
4397   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4398   processor than it must be used on all processors that share the object for
4399   that argument.
4400 
4401   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4402   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4403 
4404   The user MUST specify either the local or global matrix dimensions
4405   (possibly both).
4406 
4407   The parallel matrix is partitioned across processors such that the
4408   first `m0` rows belong to process 0, the next `m1` rows belong to
4409   process 1, the next `m2` rows belong to process 2, etc., where
4410   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4411   values corresponding to [m x N] submatrix.
4412 
4413   The columns are logically partitioned with the n0 columns belonging
4414   to 0th partition, the next n1 columns belonging to the next
4415   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4416 
4417   The DIAGONAL portion of the local submatrix on any given processor
4418   is the submatrix corresponding to the rows and columns m,n
4419   corresponding to the given processor. i.e diagonal matrix on
4420   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4421   etc. The remaining portion of the local submatrix [m x (N-n)]
4422   constitute the OFF-DIAGONAL portion. The example below better
4423   illustrates this concept. The two matrices, the DIAGONAL portion and
4424   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4425 
4426   For a square global matrix we define each processor's diagonal portion
4427   to be its local rows and the corresponding columns (a square submatrix);
4428   each processor's off-diagonal portion encompasses the remainder of the
4429   local matrix (a rectangular submatrix).
4430 
4431   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4432 
4433   When calling this routine with a single process communicator, a matrix of
4434   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4435   type of communicator, use the construction mechanism
4436 .vb
4437   MatCreate(..., &A);
4438   MatSetType(A, MATMPIAIJ);
4439   MatSetSizes(A, m, n, M, N);
4440   MatMPIAIJSetPreallocation(A, ...);
4441 .ve
4442 
4443   By default, this format uses inodes (identical nodes) when possible.
4444   We search for consecutive rows with the same nonzero structure, thereby
4445   reusing matrix information to achieve increased efficiency.
4446 
4447   Example Usage:
4448   Consider the following 8x8 matrix with 34 non-zero values, that is
4449   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4450   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4451   as follows
4452 
4453 .vb
4454             1  2  0  |  0  3  0  |  0  4
4455     Proc0   0  5  6  |  7  0  0  |  8  0
4456             9  0 10  | 11  0  0  | 12  0
4457     -------------------------------------
4458            13  0 14  | 15 16 17  |  0  0
4459     Proc1   0 18  0  | 19 20 21  |  0  0
4460             0  0  0  | 22 23  0  | 24  0
4461     -------------------------------------
4462     Proc2  25 26 27  |  0  0 28  | 29  0
4463            30  0  0  | 31 32 33  |  0 34
4464 .ve
4465 
4466   This can be represented as a collection of submatrices as
4467 
4468 .vb
4469       A B C
4470       D E F
4471       G H I
4472 .ve
4473 
4474   Where the submatrices A,B,C are owned by proc0, D,E,F are
4475   owned by proc1, G,H,I are owned by proc2.
4476 
4477   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4478   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4479   The 'M','N' parameters are 8,8, and have the same values on all procs.
4480 
4481   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4482   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4483   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4484   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4485   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4486   matrix, and [DF] as another SeqAIJ matrix.
4487 
4488   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4489   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4490   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4491   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4492   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4493   In this case, the values of `d_nz`,`o_nz` are
4494 .vb
4495      proc0  dnz = 2, o_nz = 2
4496      proc1  dnz = 3, o_nz = 2
4497      proc2  dnz = 1, o_nz = 4
4498 .ve
4499   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4500   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4501   for proc3. i.e we are using 12+15+10=37 storage locations to store
4502   34 values.
4503 
4504   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4505   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4506   In the above case the values for d_nnz,o_nnz are
4507 .vb
4508      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4509      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4510      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4511 .ve
4512   Here the space allocated is sum of all the above values i.e 34, and
4513   hence pre-allocation is perfect.
4514 
4515 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4516           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4517           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4518 @*/
4519 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4520 {
4521   PetscMPIInt size;
4522 
4523   PetscFunctionBegin;
4524   PetscCall(MatCreate(comm, A));
4525   PetscCall(MatSetSizes(*A, m, n, M, N));
4526   PetscCallMPI(MPI_Comm_size(comm, &size));
4527   if (size > 1) {
4528     PetscCall(MatSetType(*A, MATMPIAIJ));
4529     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4530   } else {
4531     PetscCall(MatSetType(*A, MATSEQAIJ));
4532     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4533   }
4534   PetscFunctionReturn(PETSC_SUCCESS);
4535 }
4536 
4537 /*@C
4538   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4539 
4540   Not Collective
4541 
4542   Input Parameter:
4543 . A - The `MATMPIAIJ` matrix
4544 
4545   Output Parameters:
4546 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4547 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4548 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4549 
4550   Level: intermediate
4551 
4552   Note:
4553   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4554   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4555   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4556   local column numbers to global column numbers in the original matrix.
4557 
4558 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4559 @*/
4560 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4561 {
4562   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4563   PetscBool   flg;
4564 
4565   PetscFunctionBegin;
4566   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4567   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4568   if (Ad) *Ad = a->A;
4569   if (Ao) *Ao = a->B;
4570   if (colmap) *colmap = a->garray;
4571   PetscFunctionReturn(PETSC_SUCCESS);
4572 }
4573 
4574 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4575 {
4576   PetscInt     m, N, i, rstart, nnz, Ii;
4577   PetscInt    *indx;
4578   PetscScalar *values;
4579   MatType      rootType;
4580 
4581   PetscFunctionBegin;
4582   PetscCall(MatGetSize(inmat, &m, &N));
4583   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4584     PetscInt *dnz, *onz, sum, bs, cbs;
4585 
4586     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4587     /* Check sum(n) = N */
4588     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4589     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4590 
4591     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4592     rstart -= m;
4593 
4594     MatPreallocateBegin(comm, m, n, dnz, onz);
4595     for (i = 0; i < m; i++) {
4596       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4597       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4598       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4599     }
4600 
4601     PetscCall(MatCreate(comm, outmat));
4602     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4603     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4604     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4605     PetscCall(MatGetRootType_Private(inmat, &rootType));
4606     PetscCall(MatSetType(*outmat, rootType));
4607     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4608     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4609     MatPreallocateEnd(dnz, onz);
4610     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4611   }
4612 
4613   /* numeric phase */
4614   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4615   for (i = 0; i < m; i++) {
4616     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4617     Ii = i + rstart;
4618     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4619     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4620   }
4621   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4622   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4623   PetscFunctionReturn(PETSC_SUCCESS);
4624 }
4625 
4626 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4627 {
4628   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4629 
4630   PetscFunctionBegin;
4631   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4632   PetscCall(PetscFree(merge->id_r));
4633   PetscCall(PetscFree(merge->len_s));
4634   PetscCall(PetscFree(merge->len_r));
4635   PetscCall(PetscFree(merge->bi));
4636   PetscCall(PetscFree(merge->bj));
4637   PetscCall(PetscFree(merge->buf_ri[0]));
4638   PetscCall(PetscFree(merge->buf_ri));
4639   PetscCall(PetscFree(merge->buf_rj[0]));
4640   PetscCall(PetscFree(merge->buf_rj));
4641   PetscCall(PetscFree(merge->coi));
4642   PetscCall(PetscFree(merge->coj));
4643   PetscCall(PetscFree(merge->owners_co));
4644   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4645   PetscCall(PetscFree(merge));
4646   PetscFunctionReturn(PETSC_SUCCESS);
4647 }
4648 
4649 #include <../src/mat/utils/freespace.h>
4650 #include <petscbt.h>
4651 
4652 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4653 {
4654   MPI_Comm             comm;
4655   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4656   PetscMPIInt          size, rank, taga, *len_s;
4657   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4658   PetscMPIInt          proc, k;
4659   PetscInt           **buf_ri, **buf_rj;
4660   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4661   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4662   MPI_Request         *s_waits, *r_waits;
4663   MPI_Status          *status;
4664   const MatScalar     *aa, *a_a;
4665   MatScalar          **abuf_r, *ba_i;
4666   Mat_Merge_SeqsToMPI *merge;
4667   PetscContainer       container;
4668 
4669   PetscFunctionBegin;
4670   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4671   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4672 
4673   PetscCallMPI(MPI_Comm_size(comm, &size));
4674   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4675 
4676   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4677   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4678   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4679   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4680   aa = a_a;
4681 
4682   bi     = merge->bi;
4683   bj     = merge->bj;
4684   buf_ri = merge->buf_ri;
4685   buf_rj = merge->buf_rj;
4686 
4687   PetscCall(PetscMalloc1(size, &status));
4688   owners = merge->rowmap->range;
4689   len_s  = merge->len_s;
4690 
4691   /* send and recv matrix values */
4692   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4693   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4694 
4695   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4696   for (proc = 0, k = 0; proc < size; proc++) {
4697     if (!len_s[proc]) continue;
4698     i = owners[proc];
4699     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4700     k++;
4701   }
4702 
4703   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4704   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4705   PetscCall(PetscFree(status));
4706 
4707   PetscCall(PetscFree(s_waits));
4708   PetscCall(PetscFree(r_waits));
4709 
4710   /* insert mat values of mpimat */
4711   PetscCall(PetscMalloc1(N, &ba_i));
4712   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4713 
4714   for (k = 0; k < merge->nrecv; k++) {
4715     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4716     nrows       = *buf_ri_k[k];
4717     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4718     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4719   }
4720 
4721   /* set values of ba */
4722   m = merge->rowmap->n;
4723   for (i = 0; i < m; i++) {
4724     arow = owners[rank] + i;
4725     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4726     bnzi = bi[i + 1] - bi[i];
4727     PetscCall(PetscArrayzero(ba_i, bnzi));
4728 
4729     /* add local non-zero vals of this proc's seqmat into ba */
4730     anzi   = ai[arow + 1] - ai[arow];
4731     aj     = a->j + ai[arow];
4732     aa     = a_a + ai[arow];
4733     nextaj = 0;
4734     for (j = 0; nextaj < anzi; j++) {
4735       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4736         ba_i[j] += aa[nextaj++];
4737       }
4738     }
4739 
4740     /* add received vals into ba */
4741     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4742       /* i-th row */
4743       if (i == *nextrow[k]) {
4744         anzi   = *(nextai[k] + 1) - *nextai[k];
4745         aj     = buf_rj[k] + *nextai[k];
4746         aa     = abuf_r[k] + *nextai[k];
4747         nextaj = 0;
4748         for (j = 0; nextaj < anzi; j++) {
4749           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4750             ba_i[j] += aa[nextaj++];
4751           }
4752         }
4753         nextrow[k]++;
4754         nextai[k]++;
4755       }
4756     }
4757     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4758   }
4759   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4760   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4761   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4762 
4763   PetscCall(PetscFree(abuf_r[0]));
4764   PetscCall(PetscFree(abuf_r));
4765   PetscCall(PetscFree(ba_i));
4766   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4767   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4768   PetscFunctionReturn(PETSC_SUCCESS);
4769 }
4770 
4771 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4772 {
4773   Mat                  B_mpi;
4774   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4775   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4776   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4777   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4778   PetscInt             len, *dnz, *onz, bs, cbs;
4779   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4780   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4781   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4782   MPI_Status          *status;
4783   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4784   PetscBT              lnkbt;
4785   Mat_Merge_SeqsToMPI *merge;
4786   PetscContainer       container;
4787 
4788   PetscFunctionBegin;
4789   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4790 
4791   /* make sure it is a PETSc comm */
4792   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4793   PetscCallMPI(MPI_Comm_size(comm, &size));
4794   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4795 
4796   PetscCall(PetscNew(&merge));
4797   PetscCall(PetscMalloc1(size, &status));
4798 
4799   /* determine row ownership */
4800   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4801   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4802   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4803   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4804   PetscCall(PetscLayoutSetUp(merge->rowmap));
4805   PetscCall(PetscMalloc1(size, &len_si));
4806   PetscCall(PetscMalloc1(size, &merge->len_s));
4807 
4808   m      = merge->rowmap->n;
4809   owners = merge->rowmap->range;
4810 
4811   /* determine the number of messages to send, their lengths */
4812   len_s = merge->len_s;
4813 
4814   len          = 0; /* length of buf_si[] */
4815   merge->nsend = 0;
4816   for (PetscMPIInt proc = 0; proc < size; proc++) {
4817     len_si[proc] = 0;
4818     if (proc == rank) {
4819       len_s[proc] = 0;
4820     } else {
4821       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4822       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4823     }
4824     if (len_s[proc]) {
4825       merge->nsend++;
4826       nrows = 0;
4827       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4828         if (ai[i + 1] > ai[i]) nrows++;
4829       }
4830       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4831       len += len_si[proc];
4832     }
4833   }
4834 
4835   /* determine the number and length of messages to receive for ij-structure */
4836   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4837   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4838 
4839   /* post the Irecv of j-structure */
4840   PetscCall(PetscCommGetNewTag(comm, &tagj));
4841   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4842 
4843   /* post the Isend of j-structure */
4844   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4845 
4846   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4847     if (!len_s[proc]) continue;
4848     i = owners[proc];
4849     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4850     k++;
4851   }
4852 
4853   /* receives and sends of j-structure are complete */
4854   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4855   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4856 
4857   /* send and recv i-structure */
4858   PetscCall(PetscCommGetNewTag(comm, &tagi));
4859   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4860 
4861   PetscCall(PetscMalloc1(len + 1, &buf_s));
4862   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4863   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4864     if (!len_s[proc]) continue;
4865     /* form outgoing message for i-structure:
4866          buf_si[0]:                 nrows to be sent
4867                [1:nrows]:           row index (global)
4868                [nrows+1:2*nrows+1]: i-structure index
4869     */
4870     nrows       = len_si[proc] / 2 - 1;
4871     buf_si_i    = buf_si + nrows + 1;
4872     buf_si[0]   = nrows;
4873     buf_si_i[0] = 0;
4874     nrows       = 0;
4875     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4876       anzi = ai[i + 1] - ai[i];
4877       if (anzi) {
4878         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4879         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4880         nrows++;
4881       }
4882     }
4883     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4884     k++;
4885     buf_si += len_si[proc];
4886   }
4887 
4888   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4889   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4890 
4891   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4892   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4893 
4894   PetscCall(PetscFree(len_si));
4895   PetscCall(PetscFree(len_ri));
4896   PetscCall(PetscFree(rj_waits));
4897   PetscCall(PetscFree2(si_waits, sj_waits));
4898   PetscCall(PetscFree(ri_waits));
4899   PetscCall(PetscFree(buf_s));
4900   PetscCall(PetscFree(status));
4901 
4902   /* compute a local seq matrix in each processor */
4903   /* allocate bi array and free space for accumulating nonzero column info */
4904   PetscCall(PetscMalloc1(m + 1, &bi));
4905   bi[0] = 0;
4906 
4907   /* create and initialize a linked list */
4908   nlnk = N + 1;
4909   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4910 
4911   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4912   len = ai[owners[rank + 1]] - ai[owners[rank]];
4913   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4914 
4915   current_space = free_space;
4916 
4917   /* determine symbolic info for each local row */
4918   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4919 
4920   for (k = 0; k < merge->nrecv; k++) {
4921     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4922     nrows       = *buf_ri_k[k];
4923     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4924     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4925   }
4926 
4927   MatPreallocateBegin(comm, m, n, dnz, onz);
4928   len = 0;
4929   for (i = 0; i < m; i++) {
4930     bnzi = 0;
4931     /* add local non-zero cols of this proc's seqmat into lnk */
4932     arow = owners[rank] + i;
4933     anzi = ai[arow + 1] - ai[arow];
4934     aj   = a->j + ai[arow];
4935     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4936     bnzi += nlnk;
4937     /* add received col data into lnk */
4938     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4939       if (i == *nextrow[k]) {            /* i-th row */
4940         anzi = *(nextai[k] + 1) - *nextai[k];
4941         aj   = buf_rj[k] + *nextai[k];
4942         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4943         bnzi += nlnk;
4944         nextrow[k]++;
4945         nextai[k]++;
4946       }
4947     }
4948     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4949 
4950     /* if free space is not available, make more free space */
4951     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
4952     /* copy data into free space, then initialize lnk */
4953     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
4954     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4955 
4956     current_space->array += bnzi;
4957     current_space->local_used += bnzi;
4958     current_space->local_remaining -= bnzi;
4959 
4960     bi[i + 1] = bi[i] + bnzi;
4961   }
4962 
4963   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4964 
4965   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
4966   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
4967   PetscCall(PetscLLDestroy(lnk, lnkbt));
4968 
4969   /* create symbolic parallel matrix B_mpi */
4970   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
4971   PetscCall(MatCreate(comm, &B_mpi));
4972   if (n == PETSC_DECIDE) {
4973     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
4974   } else {
4975     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4976   }
4977   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
4978   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
4979   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
4980   MatPreallocateEnd(dnz, onz);
4981   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
4982 
4983   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4984   B_mpi->assembled = PETSC_FALSE;
4985   merge->bi        = bi;
4986   merge->bj        = bj;
4987   merge->buf_ri    = buf_ri;
4988   merge->buf_rj    = buf_rj;
4989   merge->coi       = NULL;
4990   merge->coj       = NULL;
4991   merge->owners_co = NULL;
4992 
4993   PetscCall(PetscCommDestroy(&comm));
4994 
4995   /* attach the supporting struct to B_mpi for reuse */
4996   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
4997   PetscCall(PetscContainerSetPointer(container, merge));
4998   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
4999   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5000   PetscCall(PetscContainerDestroy(&container));
5001   *mpimat = B_mpi;
5002 
5003   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5004   PetscFunctionReturn(PETSC_SUCCESS);
5005 }
5006 
5007 /*@
5008   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5009   matrices from each processor
5010 
5011   Collective
5012 
5013   Input Parameters:
5014 + comm   - the communicators the parallel matrix will live on
5015 . seqmat - the input sequential matrices
5016 . m      - number of local rows (or `PETSC_DECIDE`)
5017 . n      - number of local columns (or `PETSC_DECIDE`)
5018 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5019 
5020   Output Parameter:
5021 . mpimat - the parallel matrix generated
5022 
5023   Level: advanced
5024 
5025   Note:
5026   The dimensions of the sequential matrix in each processor MUST be the same.
5027   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5028   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5029 
5030 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5031 @*/
5032 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5033 {
5034   PetscMPIInt size;
5035 
5036   PetscFunctionBegin;
5037   PetscCallMPI(MPI_Comm_size(comm, &size));
5038   if (size == 1) {
5039     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5040     if (scall == MAT_INITIAL_MATRIX) {
5041       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5042     } else {
5043       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5044     }
5045     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5046     PetscFunctionReturn(PETSC_SUCCESS);
5047   }
5048   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5049   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5050   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5051   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5052   PetscFunctionReturn(PETSC_SUCCESS);
5053 }
5054 
5055 /*@
5056   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5057 
5058   Not Collective
5059 
5060   Input Parameter:
5061 . A - the matrix
5062 
5063   Output Parameter:
5064 . A_loc - the local sequential matrix generated
5065 
5066   Level: developer
5067 
5068   Notes:
5069   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5070   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5071   `n` is the global column count obtained with `MatGetSize()`
5072 
5073   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5074 
5075   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5076 
5077   Destroy the matrix with `MatDestroy()`
5078 
5079 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5080 @*/
5081 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5082 {
5083   PetscBool mpi;
5084 
5085   PetscFunctionBegin;
5086   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5087   if (mpi) {
5088     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5089   } else {
5090     *A_loc = A;
5091     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5092   }
5093   PetscFunctionReturn(PETSC_SUCCESS);
5094 }
5095 
5096 /*@
5097   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5098 
5099   Not Collective
5100 
5101   Input Parameters:
5102 + A     - the matrix
5103 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5104 
5105   Output Parameter:
5106 . A_loc - the local sequential matrix generated
5107 
5108   Level: developer
5109 
5110   Notes:
5111   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5112   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5113   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5114 
5115   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5116 
5117   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5118   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5119   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5120   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5121 
5122 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5123 @*/
5124 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5125 {
5126   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5127   Mat_SeqAIJ        *mat, *a, *b;
5128   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5129   const PetscScalar *aa, *ba, *aav, *bav;
5130   PetscScalar       *ca, *cam;
5131   PetscMPIInt        size;
5132   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5133   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5134   PetscBool          match;
5135 
5136   PetscFunctionBegin;
5137   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5138   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5139   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5140   if (size == 1) {
5141     if (scall == MAT_INITIAL_MATRIX) {
5142       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5143       *A_loc = mpimat->A;
5144     } else if (scall == MAT_REUSE_MATRIX) {
5145       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5146     }
5147     PetscFunctionReturn(PETSC_SUCCESS);
5148   }
5149 
5150   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5151   a  = (Mat_SeqAIJ *)mpimat->A->data;
5152   b  = (Mat_SeqAIJ *)mpimat->B->data;
5153   ai = a->i;
5154   aj = a->j;
5155   bi = b->i;
5156   bj = b->j;
5157   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5158   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5159   aa = aav;
5160   ba = bav;
5161   if (scall == MAT_INITIAL_MATRIX) {
5162     PetscCall(PetscMalloc1(1 + am, &ci));
5163     ci[0] = 0;
5164     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5165     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5166     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5167     k = 0;
5168     for (i = 0; i < am; i++) {
5169       ncols_o = bi[i + 1] - bi[i];
5170       ncols_d = ai[i + 1] - ai[i];
5171       /* off-diagonal portion of A */
5172       for (jo = 0; jo < ncols_o; jo++) {
5173         col = cmap[*bj];
5174         if (col >= cstart) break;
5175         cj[k] = col;
5176         bj++;
5177         ca[k++] = *ba++;
5178       }
5179       /* diagonal portion of A */
5180       for (j = 0; j < ncols_d; j++) {
5181         cj[k]   = cstart + *aj++;
5182         ca[k++] = *aa++;
5183       }
5184       /* off-diagonal portion of A */
5185       for (j = jo; j < ncols_o; j++) {
5186         cj[k]   = cmap[*bj++];
5187         ca[k++] = *ba++;
5188       }
5189     }
5190     /* put together the new matrix */
5191     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5192     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5193     /* Since these are PETSc arrays, change flags to free them as necessary. */
5194     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5195     mat->free_a  = PETSC_TRUE;
5196     mat->free_ij = PETSC_TRUE;
5197     mat->nonew   = 0;
5198   } else if (scall == MAT_REUSE_MATRIX) {
5199     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5200     ci  = mat->i;
5201     cj  = mat->j;
5202     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5203     for (i = 0; i < am; i++) {
5204       /* off-diagonal portion of A */
5205       ncols_o = bi[i + 1] - bi[i];
5206       for (jo = 0; jo < ncols_o; jo++) {
5207         col = cmap[*bj];
5208         if (col >= cstart) break;
5209         *cam++ = *ba++;
5210         bj++;
5211       }
5212       /* diagonal portion of A */
5213       ncols_d = ai[i + 1] - ai[i];
5214       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5215       /* off-diagonal portion of A */
5216       for (j = jo; j < ncols_o; j++) {
5217         *cam++ = *ba++;
5218         bj++;
5219       }
5220     }
5221     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5222   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5223   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5224   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5225   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5226   PetscFunctionReturn(PETSC_SUCCESS);
5227 }
5228 
5229 /*@
5230   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5231   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5232 
5233   Not Collective
5234 
5235   Input Parameters:
5236 + A     - the matrix
5237 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5238 
5239   Output Parameters:
5240 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5241 - A_loc - the local sequential matrix generated
5242 
5243   Level: developer
5244 
5245   Note:
5246   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5247   part, then those associated with the off-diagonal part (in its local ordering)
5248 
5249 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5250 @*/
5251 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5252 {
5253   Mat             Ao, Ad;
5254   const PetscInt *cmap;
5255   PetscMPIInt     size;
5256   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5257 
5258   PetscFunctionBegin;
5259   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5260   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5261   if (size == 1) {
5262     if (scall == MAT_INITIAL_MATRIX) {
5263       PetscCall(PetscObjectReference((PetscObject)Ad));
5264       *A_loc = Ad;
5265     } else if (scall == MAT_REUSE_MATRIX) {
5266       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5267     }
5268     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5269     PetscFunctionReturn(PETSC_SUCCESS);
5270   }
5271   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5272   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5273   if (f) {
5274     PetscCall((*f)(A, scall, glob, A_loc));
5275   } else {
5276     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5277     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5278     Mat_SeqAIJ        *c;
5279     PetscInt          *ai = a->i, *aj = a->j;
5280     PetscInt          *bi = b->i, *bj = b->j;
5281     PetscInt          *ci, *cj;
5282     const PetscScalar *aa, *ba;
5283     PetscScalar       *ca;
5284     PetscInt           i, j, am, dn, on;
5285 
5286     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5287     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5288     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5289     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5290     if (scall == MAT_INITIAL_MATRIX) {
5291       PetscInt k;
5292       PetscCall(PetscMalloc1(1 + am, &ci));
5293       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5294       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5295       ci[0] = 0;
5296       for (i = 0, k = 0; i < am; i++) {
5297         const PetscInt ncols_o = bi[i + 1] - bi[i];
5298         const PetscInt ncols_d = ai[i + 1] - ai[i];
5299         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5300         /* diagonal portion of A */
5301         for (j = 0; j < ncols_d; j++, k++) {
5302           cj[k] = *aj++;
5303           ca[k] = *aa++;
5304         }
5305         /* off-diagonal portion of A */
5306         for (j = 0; j < ncols_o; j++, k++) {
5307           cj[k] = dn + *bj++;
5308           ca[k] = *ba++;
5309         }
5310       }
5311       /* put together the new matrix */
5312       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5313       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5314       /* Since these are PETSc arrays, change flags to free them as necessary. */
5315       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5316       c->free_a  = PETSC_TRUE;
5317       c->free_ij = PETSC_TRUE;
5318       c->nonew   = 0;
5319       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5320     } else if (scall == MAT_REUSE_MATRIX) {
5321       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5322       for (i = 0; i < am; i++) {
5323         const PetscInt ncols_d = ai[i + 1] - ai[i];
5324         const PetscInt ncols_o = bi[i + 1] - bi[i];
5325         /* diagonal portion of A */
5326         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5327         /* off-diagonal portion of A */
5328         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5329       }
5330       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5331     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5332     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5333     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5334     if (glob) {
5335       PetscInt cst, *gidx;
5336 
5337       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5338       PetscCall(PetscMalloc1(dn + on, &gidx));
5339       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5340       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5341       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5342     }
5343   }
5344   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5345   PetscFunctionReturn(PETSC_SUCCESS);
5346 }
5347 
5348 /*@C
5349   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5350 
5351   Not Collective
5352 
5353   Input Parameters:
5354 + A     - the matrix
5355 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5356 . row   - index set of rows to extract (or `NULL`)
5357 - col   - index set of columns to extract (or `NULL`)
5358 
5359   Output Parameter:
5360 . A_loc - the local sequential matrix generated
5361 
5362   Level: developer
5363 
5364 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5365 @*/
5366 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5367 {
5368   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5369   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5370   IS          isrowa, iscola;
5371   Mat        *aloc;
5372   PetscBool   match;
5373 
5374   PetscFunctionBegin;
5375   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5376   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5377   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5378   if (!row) {
5379     start = A->rmap->rstart;
5380     end   = A->rmap->rend;
5381     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5382   } else {
5383     isrowa = *row;
5384   }
5385   if (!col) {
5386     start = A->cmap->rstart;
5387     cmap  = a->garray;
5388     nzA   = a->A->cmap->n;
5389     nzB   = a->B->cmap->n;
5390     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5391     ncols = 0;
5392     for (i = 0; i < nzB; i++) {
5393       if (cmap[i] < start) idx[ncols++] = cmap[i];
5394       else break;
5395     }
5396     imark = i;
5397     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5398     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5399     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5400   } else {
5401     iscola = *col;
5402   }
5403   if (scall != MAT_INITIAL_MATRIX) {
5404     PetscCall(PetscMalloc1(1, &aloc));
5405     aloc[0] = *A_loc;
5406   }
5407   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5408   if (!col) { /* attach global id of condensed columns */
5409     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5410   }
5411   *A_loc = aloc[0];
5412   PetscCall(PetscFree(aloc));
5413   if (!row) PetscCall(ISDestroy(&isrowa));
5414   if (!col) PetscCall(ISDestroy(&iscola));
5415   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5416   PetscFunctionReturn(PETSC_SUCCESS);
5417 }
5418 
5419 /*
5420  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5421  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5422  * on a global size.
5423  * */
5424 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5425 {
5426   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5427   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5428   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5429   PetscMPIInt            owner;
5430   PetscSFNode           *iremote, *oiremote;
5431   const PetscInt        *lrowindices;
5432   PetscSF                sf, osf;
5433   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5434   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5435   MPI_Comm               comm;
5436   ISLocalToGlobalMapping mapping;
5437   const PetscScalar     *pd_a, *po_a;
5438 
5439   PetscFunctionBegin;
5440   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5441   /* plocalsize is the number of roots
5442    * nrows is the number of leaves
5443    * */
5444   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5445   PetscCall(ISGetLocalSize(rows, &nrows));
5446   PetscCall(PetscCalloc1(nrows, &iremote));
5447   PetscCall(ISGetIndices(rows, &lrowindices));
5448   for (i = 0; i < nrows; i++) {
5449     /* Find a remote index and an owner for a row
5450      * The row could be local or remote
5451      * */
5452     owner = 0;
5453     lidx  = 0;
5454     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5455     iremote[i].index = lidx;
5456     iremote[i].rank  = owner;
5457   }
5458   /* Create SF to communicate how many nonzero columns for each row */
5459   PetscCall(PetscSFCreate(comm, &sf));
5460   /* SF will figure out the number of nonzero columns for each row, and their
5461    * offsets
5462    * */
5463   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5464   PetscCall(PetscSFSetFromOptions(sf));
5465   PetscCall(PetscSFSetUp(sf));
5466 
5467   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5468   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5469   PetscCall(PetscCalloc1(nrows, &pnnz));
5470   roffsets[0] = 0;
5471   roffsets[1] = 0;
5472   for (i = 0; i < plocalsize; i++) {
5473     /* diagonal */
5474     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5475     /* off-diagonal */
5476     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5477     /* compute offsets so that we relative location for each row */
5478     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5479     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5480   }
5481   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5482   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5483   /* 'r' means root, and 'l' means leaf */
5484   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5485   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5486   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5487   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5488   PetscCall(PetscSFDestroy(&sf));
5489   PetscCall(PetscFree(roffsets));
5490   PetscCall(PetscFree(nrcols));
5491   dntotalcols = 0;
5492   ontotalcols = 0;
5493   ncol        = 0;
5494   for (i = 0; i < nrows; i++) {
5495     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5496     ncol    = PetscMax(pnnz[i], ncol);
5497     /* diagonal */
5498     dntotalcols += nlcols[i * 2 + 0];
5499     /* off-diagonal */
5500     ontotalcols += nlcols[i * 2 + 1];
5501   }
5502   /* We do not need to figure the right number of columns
5503    * since all the calculations will be done by going through the raw data
5504    * */
5505   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5506   PetscCall(MatSetUp(*P_oth));
5507   PetscCall(PetscFree(pnnz));
5508   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5509   /* diagonal */
5510   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5511   /* off-diagonal */
5512   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5513   /* diagonal */
5514   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5515   /* off-diagonal */
5516   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5517   dntotalcols = 0;
5518   ontotalcols = 0;
5519   ntotalcols  = 0;
5520   for (i = 0; i < nrows; i++) {
5521     owner = 0;
5522     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5523     /* Set iremote for diag matrix */
5524     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5525       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5526       iremote[dntotalcols].rank  = owner;
5527       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5528       ilocal[dntotalcols++] = ntotalcols++;
5529     }
5530     /* off-diagonal */
5531     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5532       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5533       oiremote[ontotalcols].rank  = owner;
5534       oilocal[ontotalcols++]      = ntotalcols++;
5535     }
5536   }
5537   PetscCall(ISRestoreIndices(rows, &lrowindices));
5538   PetscCall(PetscFree(loffsets));
5539   PetscCall(PetscFree(nlcols));
5540   PetscCall(PetscSFCreate(comm, &sf));
5541   /* P serves as roots and P_oth is leaves
5542    * Diag matrix
5543    * */
5544   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5545   PetscCall(PetscSFSetFromOptions(sf));
5546   PetscCall(PetscSFSetUp(sf));
5547 
5548   PetscCall(PetscSFCreate(comm, &osf));
5549   /* off-diagonal */
5550   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5551   PetscCall(PetscSFSetFromOptions(osf));
5552   PetscCall(PetscSFSetUp(osf));
5553   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5554   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5555   /* operate on the matrix internal data to save memory */
5556   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5557   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5558   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5559   /* Convert to global indices for diag matrix */
5560   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5561   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5562   /* We want P_oth store global indices */
5563   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5564   /* Use memory scalable approach */
5565   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5566   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5567   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5568   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5569   /* Convert back to local indices */
5570   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5571   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5572   nout = 0;
5573   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5574   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5575   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5576   /* Exchange values */
5577   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5578   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5579   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5580   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5581   /* Stop PETSc from shrinking memory */
5582   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5583   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5584   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5585   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5586   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5587   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5588   PetscCall(PetscSFDestroy(&sf));
5589   PetscCall(PetscSFDestroy(&osf));
5590   PetscFunctionReturn(PETSC_SUCCESS);
5591 }
5592 
5593 /*
5594  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5595  * This supports MPIAIJ and MAIJ
5596  * */
5597 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5598 {
5599   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5600   Mat_SeqAIJ *p_oth;
5601   IS          rows, map;
5602   PetscHMapI  hamp;
5603   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5604   MPI_Comm    comm;
5605   PetscSF     sf, osf;
5606   PetscBool   has;
5607 
5608   PetscFunctionBegin;
5609   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5610   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5611   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5612    *  and then create a submatrix (that often is an overlapping matrix)
5613    * */
5614   if (reuse == MAT_INITIAL_MATRIX) {
5615     /* Use a hash table to figure out unique keys */
5616     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5617     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5618     count = 0;
5619     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5620     for (i = 0; i < a->B->cmap->n; i++) {
5621       key = a->garray[i] / dof;
5622       PetscCall(PetscHMapIHas(hamp, key, &has));
5623       if (!has) {
5624         mapping[i] = count;
5625         PetscCall(PetscHMapISet(hamp, key, count++));
5626       } else {
5627         /* Current 'i' has the same value the previous step */
5628         mapping[i] = count - 1;
5629       }
5630     }
5631     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5632     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5633     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5634     PetscCall(PetscCalloc1(htsize, &rowindices));
5635     off = 0;
5636     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5637     PetscCall(PetscHMapIDestroy(&hamp));
5638     PetscCall(PetscSortInt(htsize, rowindices));
5639     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5640     /* In case, the matrix was already created but users want to recreate the matrix */
5641     PetscCall(MatDestroy(P_oth));
5642     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5643     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5644     PetscCall(ISDestroy(&map));
5645     PetscCall(ISDestroy(&rows));
5646   } else if (reuse == MAT_REUSE_MATRIX) {
5647     /* If matrix was already created, we simply update values using SF objects
5648      * that as attached to the matrix earlier.
5649      */
5650     const PetscScalar *pd_a, *po_a;
5651 
5652     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5653     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5654     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5655     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5656     /* Update values in place */
5657     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5658     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5659     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5660     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5661     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5662     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5663     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5664     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5665   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5666   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5667   PetscFunctionReturn(PETSC_SUCCESS);
5668 }
5669 
5670 /*@C
5671   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5672 
5673   Collective
5674 
5675   Input Parameters:
5676 + A     - the first matrix in `MATMPIAIJ` format
5677 . B     - the second matrix in `MATMPIAIJ` format
5678 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5679 
5680   Output Parameters:
5681 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5682 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5683 - B_seq - the sequential matrix generated
5684 
5685   Level: developer
5686 
5687 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5688 @*/
5689 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5690 {
5691   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5692   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5693   IS          isrowb, iscolb;
5694   Mat        *bseq = NULL;
5695 
5696   PetscFunctionBegin;
5697   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5698              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5699   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5700 
5701   if (scall == MAT_INITIAL_MATRIX) {
5702     start = A->cmap->rstart;
5703     cmap  = a->garray;
5704     nzA   = a->A->cmap->n;
5705     nzB   = a->B->cmap->n;
5706     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5707     ncols = 0;
5708     for (i = 0; i < nzB; i++) { /* row < local row index */
5709       if (cmap[i] < start) idx[ncols++] = cmap[i];
5710       else break;
5711     }
5712     imark = i;
5713     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5714     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5715     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5716     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5717   } else {
5718     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5719     isrowb = *rowb;
5720     iscolb = *colb;
5721     PetscCall(PetscMalloc1(1, &bseq));
5722     bseq[0] = *B_seq;
5723   }
5724   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5725   *B_seq = bseq[0];
5726   PetscCall(PetscFree(bseq));
5727   if (!rowb) {
5728     PetscCall(ISDestroy(&isrowb));
5729   } else {
5730     *rowb = isrowb;
5731   }
5732   if (!colb) {
5733     PetscCall(ISDestroy(&iscolb));
5734   } else {
5735     *colb = iscolb;
5736   }
5737   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5738   PetscFunctionReturn(PETSC_SUCCESS);
5739 }
5740 
5741 /*
5742     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5743     of the OFF-DIAGONAL portion of local A
5744 
5745     Collective
5746 
5747    Input Parameters:
5748 +    A,B - the matrices in `MATMPIAIJ` format
5749 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5750 
5751    Output Parameter:
5752 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5753 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5754 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5755 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5756 
5757     Developer Note:
5758     This directly accesses information inside the VecScatter associated with the matrix-vector product
5759      for this matrix. This is not desirable..
5760 
5761     Level: developer
5762 
5763 */
5764 
5765 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5766 {
5767   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5768   VecScatter         ctx;
5769   MPI_Comm           comm;
5770   const PetscMPIInt *rprocs, *sprocs;
5771   PetscMPIInt        nrecvs, nsends;
5772   const PetscInt    *srow, *rstarts, *sstarts;
5773   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5774   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5775   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5776   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5777   PetscMPIInt        size, tag, rank, nreqs;
5778 
5779   PetscFunctionBegin;
5780   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5781   PetscCallMPI(MPI_Comm_size(comm, &size));
5782 
5783   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5784              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5785   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5786   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5787 
5788   if (size == 1) {
5789     startsj_s = NULL;
5790     bufa_ptr  = NULL;
5791     *B_oth    = NULL;
5792     PetscFunctionReturn(PETSC_SUCCESS);
5793   }
5794 
5795   ctx = a->Mvctx;
5796   tag = ((PetscObject)ctx)->tag;
5797 
5798   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5799   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5800   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5801   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5802   PetscCall(PetscMalloc1(nreqs, &reqs));
5803   rwaits = reqs;
5804   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5805 
5806   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5807   if (scall == MAT_INITIAL_MATRIX) {
5808     /* i-array */
5809     /*  post receives */
5810     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5811     for (i = 0; i < nrecvs; i++) {
5812       rowlen = rvalues + rstarts[i] * rbs;
5813       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5814       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5815     }
5816 
5817     /* pack the outgoing message */
5818     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5819 
5820     sstartsj[0] = 0;
5821     rstartsj[0] = 0;
5822     len         = 0; /* total length of j or a array to be sent */
5823     if (nsends) {
5824       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5825       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5826     }
5827     for (i = 0; i < nsends; i++) {
5828       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5829       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5830       for (j = 0; j < nrows; j++) {
5831         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5832         for (l = 0; l < sbs; l++) {
5833           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5834 
5835           rowlen[j * sbs + l] = ncols;
5836 
5837           len += ncols;
5838           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5839         }
5840         k++;
5841       }
5842       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5843 
5844       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5845     }
5846     /* recvs and sends of i-array are completed */
5847     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5848     PetscCall(PetscFree(svalues));
5849 
5850     /* allocate buffers for sending j and a arrays */
5851     PetscCall(PetscMalloc1(len + 1, &bufj));
5852     PetscCall(PetscMalloc1(len + 1, &bufa));
5853 
5854     /* create i-array of B_oth */
5855     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5856 
5857     b_othi[0] = 0;
5858     len       = 0; /* total length of j or a array to be received */
5859     k         = 0;
5860     for (i = 0; i < nrecvs; i++) {
5861       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5862       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5863       for (j = 0; j < nrows; j++) {
5864         b_othi[k + 1] = b_othi[k] + rowlen[j];
5865         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5866         k++;
5867       }
5868       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5869     }
5870     PetscCall(PetscFree(rvalues));
5871 
5872     /* allocate space for j and a arrays of B_oth */
5873     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5874     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5875 
5876     /* j-array */
5877     /*  post receives of j-array */
5878     for (i = 0; i < nrecvs; i++) {
5879       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5880       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5881     }
5882 
5883     /* pack the outgoing message j-array */
5884     if (nsends) k = sstarts[0];
5885     for (i = 0; i < nsends; i++) {
5886       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5887       bufJ  = bufj + sstartsj[i];
5888       for (j = 0; j < nrows; j++) {
5889         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5890         for (ll = 0; ll < sbs; ll++) {
5891           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5892           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5893           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5894         }
5895       }
5896       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5897     }
5898 
5899     /* recvs and sends of j-array are completed */
5900     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5901   } else if (scall == MAT_REUSE_MATRIX) {
5902     sstartsj = *startsj_s;
5903     rstartsj = *startsj_r;
5904     bufa     = *bufa_ptr;
5905     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5906   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5907 
5908   /* a-array */
5909   /*  post receives of a-array */
5910   for (i = 0; i < nrecvs; i++) {
5911     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5912     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5913   }
5914 
5915   /* pack the outgoing message a-array */
5916   if (nsends) k = sstarts[0];
5917   for (i = 0; i < nsends; i++) {
5918     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5919     bufA  = bufa + sstartsj[i];
5920     for (j = 0; j < nrows; j++) {
5921       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5922       for (ll = 0; ll < sbs; ll++) {
5923         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5924         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5925         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5926       }
5927     }
5928     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5929   }
5930   /* recvs and sends of a-array are completed */
5931   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5932   PetscCall(PetscFree(reqs));
5933 
5934   if (scall == MAT_INITIAL_MATRIX) {
5935     Mat_SeqAIJ *b_oth;
5936 
5937     /* put together the new matrix */
5938     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5939 
5940     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5941     /* Since these are PETSc arrays, change flags to free them as necessary. */
5942     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5943     b_oth->free_a  = PETSC_TRUE;
5944     b_oth->free_ij = PETSC_TRUE;
5945     b_oth->nonew   = 0;
5946 
5947     PetscCall(PetscFree(bufj));
5948     if (!startsj_s || !bufa_ptr) {
5949       PetscCall(PetscFree2(sstartsj, rstartsj));
5950       PetscCall(PetscFree(bufa_ptr));
5951     } else {
5952       *startsj_s = sstartsj;
5953       *startsj_r = rstartsj;
5954       *bufa_ptr  = bufa;
5955     }
5956   } else if (scall == MAT_REUSE_MATRIX) {
5957     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5958   }
5959 
5960   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
5961   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
5962   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5963   PetscFunctionReturn(PETSC_SUCCESS);
5964 }
5965 
5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5967 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5968 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
5969 #if defined(PETSC_HAVE_MKL_SPARSE)
5970 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5971 #endif
5972 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5973 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
5974 #if defined(PETSC_HAVE_ELEMENTAL)
5975 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
5976 #endif
5977 #if defined(PETSC_HAVE_SCALAPACK)
5978 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
5979 #endif
5980 #if defined(PETSC_HAVE_HYPRE)
5981 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
5982 #endif
5983 #if defined(PETSC_HAVE_CUDA)
5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
5985 #endif
5986 #if defined(PETSC_HAVE_HIP)
5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
5988 #endif
5989 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
5991 #endif
5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
5993 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
5994 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5995 
5996 /*
5997     Computes (B'*A')' since computing B*A directly is untenable
5998 
5999                n                       p                          p
6000         [             ]       [             ]         [                 ]
6001       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6002         [             ]       [             ]         [                 ]
6003 
6004 */
6005 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6006 {
6007   Mat At, Bt, Ct;
6008 
6009   PetscFunctionBegin;
6010   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6011   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6012   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6013   PetscCall(MatDestroy(&At));
6014   PetscCall(MatDestroy(&Bt));
6015   PetscCall(MatTransposeSetPrecursor(Ct, C));
6016   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6017   PetscCall(MatDestroy(&Ct));
6018   PetscFunctionReturn(PETSC_SUCCESS);
6019 }
6020 
6021 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6022 {
6023   PetscBool cisdense;
6024 
6025   PetscFunctionBegin;
6026   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6027   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6028   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6029   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6030   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6031   PetscCall(MatSetUp(C));
6032 
6033   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6034   PetscFunctionReturn(PETSC_SUCCESS);
6035 }
6036 
6037 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6038 {
6039   Mat_Product *product = C->product;
6040   Mat          A = product->A, B = product->B;
6041 
6042   PetscFunctionBegin;
6043   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6044              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6045   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6046   C->ops->productsymbolic = MatProductSymbolic_AB;
6047   PetscFunctionReturn(PETSC_SUCCESS);
6048 }
6049 
6050 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6051 {
6052   Mat_Product *product = C->product;
6053 
6054   PetscFunctionBegin;
6055   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6056   PetscFunctionReturn(PETSC_SUCCESS);
6057 }
6058 
6059 /*
6060    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6061 
6062   Input Parameters:
6063 
6064     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6065     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6066 
6067     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6068 
6069     For Set1, j1[] contains column indices of the nonzeros.
6070     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6071     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6072     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6073 
6074     Similar for Set2.
6075 
6076     This routine merges the two sets of nonzeros row by row and removes repeats.
6077 
6078   Output Parameters: (memory is allocated by the caller)
6079 
6080     i[],j[]: the CSR of the merged matrix, which has m rows.
6081     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6082     imap2[]: similar to imap1[], but for Set2.
6083     Note we order nonzeros row-by-row and from left to right.
6084 */
6085 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6086 {
6087   PetscInt   r, m; /* Row index of mat */
6088   PetscCount t, t1, t2, b1, e1, b2, e2;
6089 
6090   PetscFunctionBegin;
6091   PetscCall(MatGetLocalSize(mat, &m, NULL));
6092   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6093   i[0]        = 0;
6094   for (r = 0; r < m; r++) { /* Do row by row merging */
6095     b1 = rowBegin1[r];
6096     e1 = rowEnd1[r];
6097     b2 = rowBegin2[r];
6098     e2 = rowEnd2[r];
6099     while (b1 < e1 && b2 < e2) {
6100       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6101         j[t]      = j1[b1];
6102         imap1[t1] = t;
6103         imap2[t2] = t;
6104         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6105         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6106         t1++;
6107         t2++;
6108         t++;
6109       } else if (j1[b1] < j2[b2]) {
6110         j[t]      = j1[b1];
6111         imap1[t1] = t;
6112         b1 += jmap1[t1 + 1] - jmap1[t1];
6113         t1++;
6114         t++;
6115       } else {
6116         j[t]      = j2[b2];
6117         imap2[t2] = t;
6118         b2 += jmap2[t2 + 1] - jmap2[t2];
6119         t2++;
6120         t++;
6121       }
6122     }
6123     /* Merge the remaining in either j1[] or j2[] */
6124     while (b1 < e1) {
6125       j[t]      = j1[b1];
6126       imap1[t1] = t;
6127       b1 += jmap1[t1 + 1] - jmap1[t1];
6128       t1++;
6129       t++;
6130     }
6131     while (b2 < e2) {
6132       j[t]      = j2[b2];
6133       imap2[t2] = t;
6134       b2 += jmap2[t2 + 1] - jmap2[t2];
6135       t2++;
6136       t++;
6137     }
6138     PetscCall(PetscIntCast(t, i + r + 1));
6139   }
6140   PetscFunctionReturn(PETSC_SUCCESS);
6141 }
6142 
6143 /*
6144   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6145 
6146   Input Parameters:
6147     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6148     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6149       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6150 
6151       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6152       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6153 
6154   Output Parameters:
6155     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6156     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6157       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6158       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6159 
6160     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6161       Atot: number of entries belonging to the diagonal block.
6162       Annz: number of unique nonzeros belonging to the diagonal block.
6163       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6164         repeats (i.e., same 'i,j' pair).
6165       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6166         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6167 
6168       Atot: number of entries belonging to the diagonal block
6169       Annz: number of unique nonzeros belonging to the diagonal block.
6170 
6171     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6172 
6173     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6174 */
6175 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6176 {
6177   PetscInt    cstart, cend, rstart, rend, row, col;
6178   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6179   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6180   PetscCount  k, m, p, q, r, s, mid;
6181   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6182 
6183   PetscFunctionBegin;
6184   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6185   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6186   m = rend - rstart;
6187 
6188   /* Skip negative rows */
6189   for (k = 0; k < n; k++)
6190     if (i[k] >= 0) break;
6191 
6192   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6193      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6194   */
6195   while (k < n) {
6196     row = i[k];
6197     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6198     for (s = k; s < n; s++)
6199       if (i[s] != row) break;
6200 
6201     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6202     for (p = k; p < s; p++) {
6203       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6204     }
6205     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6206     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6207     rowBegin[row - rstart] = k;
6208     rowMid[row - rstart]   = mid;
6209     rowEnd[row - rstart]   = s;
6210     PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N);
6211 
6212     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6213     Atot += mid - k;
6214     Btot += s - mid;
6215 
6216     /* Count unique nonzeros of this diag row */
6217     for (p = k; p < mid;) {
6218       col = j[p];
6219       do {
6220         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6221         p++;
6222       } while (p < mid && j[p] == col);
6223       Annz++;
6224     }
6225 
6226     /* Count unique nonzeros of this offdiag row */
6227     for (p = mid; p < s;) {
6228       col = j[p];
6229       do {
6230         p++;
6231       } while (p < s && j[p] == col);
6232       Bnnz++;
6233     }
6234     k = s;
6235   }
6236 
6237   /* Allocation according to Atot, Btot, Annz, Bnnz */
6238   PetscCall(PetscMalloc1(Atot, &Aperm));
6239   PetscCall(PetscMalloc1(Btot, &Bperm));
6240   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6241   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6242 
6243   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6244   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6245   for (r = 0; r < m; r++) {
6246     k   = rowBegin[r];
6247     mid = rowMid[r];
6248     s   = rowEnd[r];
6249     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6250     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6251     Atot += mid - k;
6252     Btot += s - mid;
6253 
6254     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6255     for (p = k; p < mid;) {
6256       col = j[p];
6257       q   = p;
6258       do {
6259         p++;
6260       } while (p < mid && j[p] == col);
6261       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6262       Annz++;
6263     }
6264 
6265     for (p = mid; p < s;) {
6266       col = j[p];
6267       q   = p;
6268       do {
6269         p++;
6270       } while (p < s && j[p] == col);
6271       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6272       Bnnz++;
6273     }
6274   }
6275   /* Output */
6276   *Aperm_ = Aperm;
6277   *Annz_  = Annz;
6278   *Atot_  = Atot;
6279   *Ajmap_ = Ajmap;
6280   *Bperm_ = Bperm;
6281   *Bnnz_  = Bnnz;
6282   *Btot_  = Btot;
6283   *Bjmap_ = Bjmap;
6284   PetscFunctionReturn(PETSC_SUCCESS);
6285 }
6286 
6287 /*
6288   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6289 
6290   Input Parameters:
6291     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6292     nnz:  number of unique nonzeros in the merged matrix
6293     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6294     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6295 
6296   Output Parameter: (memory is allocated by the caller)
6297     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6298 
6299   Example:
6300     nnz1 = 4
6301     nnz  = 6
6302     imap = [1,3,4,5]
6303     jmap = [0,3,5,6,7]
6304    then,
6305     jmap_new = [0,0,3,3,5,6,7]
6306 */
6307 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6308 {
6309   PetscCount k, p;
6310 
6311   PetscFunctionBegin;
6312   jmap_new[0] = 0;
6313   p           = nnz;                /* p loops over jmap_new[] backwards */
6314   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6315     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6316   }
6317   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6318   PetscFunctionReturn(PETSC_SUCCESS);
6319 }
6320 
6321 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6322 {
6323   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6324 
6325   PetscFunctionBegin;
6326   PetscCall(PetscSFDestroy(&coo->sf));
6327   PetscCall(PetscFree(coo->Aperm1));
6328   PetscCall(PetscFree(coo->Bperm1));
6329   PetscCall(PetscFree(coo->Ajmap1));
6330   PetscCall(PetscFree(coo->Bjmap1));
6331   PetscCall(PetscFree(coo->Aimap2));
6332   PetscCall(PetscFree(coo->Bimap2));
6333   PetscCall(PetscFree(coo->Aperm2));
6334   PetscCall(PetscFree(coo->Bperm2));
6335   PetscCall(PetscFree(coo->Ajmap2));
6336   PetscCall(PetscFree(coo->Bjmap2));
6337   PetscCall(PetscFree(coo->Cperm1));
6338   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6339   PetscCall(PetscFree(coo));
6340   PetscFunctionReturn(PETSC_SUCCESS);
6341 }
6342 
6343 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6344 {
6345   MPI_Comm             comm;
6346   PetscMPIInt          rank, size;
6347   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6348   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6349   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6350   PetscContainer       container;
6351   MatCOOStruct_MPIAIJ *coo;
6352 
6353   PetscFunctionBegin;
6354   PetscCall(PetscFree(mpiaij->garray));
6355   PetscCall(VecDestroy(&mpiaij->lvec));
6356 #if defined(PETSC_USE_CTABLE)
6357   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6358 #else
6359   PetscCall(PetscFree(mpiaij->colmap));
6360 #endif
6361   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6362   mat->assembled     = PETSC_FALSE;
6363   mat->was_assembled = PETSC_FALSE;
6364 
6365   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6366   PetscCallMPI(MPI_Comm_size(comm, &size));
6367   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6368   PetscCall(PetscLayoutSetUp(mat->rmap));
6369   PetscCall(PetscLayoutSetUp(mat->cmap));
6370   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6371   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6372   PetscCall(MatGetLocalSize(mat, &m, &n));
6373   PetscCall(MatGetSize(mat, &M, &N));
6374 
6375   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6376   /* entries come first, then local rows, then remote rows.                     */
6377   PetscCount n1 = coo_n, *perm1;
6378   PetscInt  *i1 = coo_i, *j1 = coo_j;
6379 
6380   PetscCall(PetscMalloc1(n1, &perm1));
6381   for (k = 0; k < n1; k++) perm1[k] = k;
6382 
6383   /* Manipulate indices so that entries with negative row or col indices will have smallest
6384      row indices, local entries will have greater but negative row indices, and remote entries
6385      will have positive row indices.
6386   */
6387   for (k = 0; k < n1; k++) {
6388     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6389     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6390     else {
6391       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6392       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6393     }
6394   }
6395 
6396   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6397   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6398 
6399   /* Advance k to the first entry we need to take care of */
6400   for (k = 0; k < n1; k++)
6401     if (i1[k] > PETSC_INT_MIN) break;
6402   PetscCount i1start = k;
6403 
6404   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6405   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6406 
6407   PetscCheck(i1 == NULL || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M);
6408 
6409   /*           Send remote rows to their owner                                  */
6410   /* Find which rows should be sent to which remote ranks*/
6411   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6412   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6413   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6414   const PetscInt *ranges;
6415   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6416 
6417   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6418   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6419   for (k = rem; k < n1;) {
6420     PetscMPIInt owner;
6421     PetscInt    firstRow, lastRow;
6422 
6423     /* Locate a row range */
6424     firstRow = i1[k]; /* first row of this owner */
6425     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6426     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6427 
6428     /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */
6429     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6430 
6431     /* All entries in [k,p) belong to this remote owner */
6432     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6433       PetscMPIInt *sendto2;
6434       PetscInt    *nentries2;
6435       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6436 
6437       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6438       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6439       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6440       PetscCall(PetscFree2(sendto, nentries2));
6441       sendto   = sendto2;
6442       nentries = nentries2;
6443       maxNsend = maxNsend2;
6444     }
6445     sendto[nsend] = owner;
6446     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6447     nsend++;
6448     k = p;
6449   }
6450 
6451   /* Build 1st SF to know offsets on remote to send data */
6452   PetscSF      sf1;
6453   PetscInt     nroots = 1, nroots2 = 0;
6454   PetscInt     nleaves = nsend, nleaves2 = 0;
6455   PetscInt    *offsets;
6456   PetscSFNode *iremote;
6457 
6458   PetscCall(PetscSFCreate(comm, &sf1));
6459   PetscCall(PetscMalloc1(nsend, &iremote));
6460   PetscCall(PetscMalloc1(nsend, &offsets));
6461   for (k = 0; k < nsend; k++) {
6462     iremote[k].rank  = sendto[k];
6463     iremote[k].index = 0;
6464     nleaves2 += nentries[k];
6465     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6466   }
6467   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6468   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6469   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6470   PetscCall(PetscSFDestroy(&sf1));
6471   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6472 
6473   /* Build 2nd SF to send remote COOs to their owner */
6474   PetscSF sf2;
6475   nroots  = nroots2;
6476   nleaves = nleaves2;
6477   PetscCall(PetscSFCreate(comm, &sf2));
6478   PetscCall(PetscSFSetFromOptions(sf2));
6479   PetscCall(PetscMalloc1(nleaves, &iremote));
6480   p = 0;
6481   for (k = 0; k < nsend; k++) {
6482     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6483     for (q = 0; q < nentries[k]; q++, p++) {
6484       iremote[p].rank = sendto[k];
6485       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6486     }
6487   }
6488   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6489 
6490   /* Send the remote COOs to their owner */
6491   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6492   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6493   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6494   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6495   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6496   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6497   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6498   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6499   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6500   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6501   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6502 
6503   PetscCall(PetscFree(offsets));
6504   PetscCall(PetscFree2(sendto, nentries));
6505 
6506   /* Sort received COOs by row along with the permutation array     */
6507   for (k = 0; k < n2; k++) perm2[k] = k;
6508   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6509 
6510   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6511   PetscCount *Cperm1;
6512   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6513   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6514   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6515   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6516 
6517   /* Support for HYPRE matrices, kind of a hack.
6518      Swap min column with diagonal so that diagonal values will go first */
6519   PetscBool hypre;
6520   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6521   if (hypre) {
6522     PetscInt *minj;
6523     PetscBT   hasdiag;
6524 
6525     PetscCall(PetscBTCreate(m, &hasdiag));
6526     PetscCall(PetscMalloc1(m, &minj));
6527     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6528     for (k = i1start; k < rem; k++) {
6529       if (j1[k] < cstart || j1[k] >= cend) continue;
6530       const PetscInt rindex = i1[k] - rstart;
6531       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6532       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6533     }
6534     for (k = 0; k < n2; k++) {
6535       if (j2[k] < cstart || j2[k] >= cend) continue;
6536       const PetscInt rindex = i2[k] - rstart;
6537       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6538       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6539     }
6540     for (k = i1start; k < rem; k++) {
6541       const PetscInt rindex = i1[k] - rstart;
6542       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6543       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6544       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6545     }
6546     for (k = 0; k < n2; k++) {
6547       const PetscInt rindex = i2[k] - rstart;
6548       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6549       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6550       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6551     }
6552     PetscCall(PetscBTDestroy(&hasdiag));
6553     PetscCall(PetscFree(minj));
6554   }
6555 
6556   /* Split local COOs and received COOs into diag/offdiag portions */
6557   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6558   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6559   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6560   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6561   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6562   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6563 
6564   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6565   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6566   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6567   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6568 
6569   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6570   PetscInt *Ai, *Bi;
6571   PetscInt *Aj, *Bj;
6572 
6573   PetscCall(PetscMalloc1(m + 1, &Ai));
6574   PetscCall(PetscMalloc1(m + 1, &Bi));
6575   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6576   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6577 
6578   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6579   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6580   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6581   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6582   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6583 
6584   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6585   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6586 
6587   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6588   /* expect nonzeros in A/B most likely have local contributing entries        */
6589   PetscInt    Annz = Ai[m];
6590   PetscInt    Bnnz = Bi[m];
6591   PetscCount *Ajmap1_new, *Bjmap1_new;
6592 
6593   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6594   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6595 
6596   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6597   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6598 
6599   PetscCall(PetscFree(Aimap1));
6600   PetscCall(PetscFree(Ajmap1));
6601   PetscCall(PetscFree(Bimap1));
6602   PetscCall(PetscFree(Bjmap1));
6603   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6604   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6605   PetscCall(PetscFree(perm1));
6606   PetscCall(PetscFree3(i2, j2, perm2));
6607 
6608   Ajmap1 = Ajmap1_new;
6609   Bjmap1 = Bjmap1_new;
6610 
6611   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6612   if (Annz < Annz1 + Annz2) {
6613     PetscInt *Aj_new;
6614     PetscCall(PetscMalloc1(Annz, &Aj_new));
6615     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6616     PetscCall(PetscFree(Aj));
6617     Aj = Aj_new;
6618   }
6619 
6620   if (Bnnz < Bnnz1 + Bnnz2) {
6621     PetscInt *Bj_new;
6622     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6623     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6624     PetscCall(PetscFree(Bj));
6625     Bj = Bj_new;
6626   }
6627 
6628   /* Create new submatrices for on-process and off-process coupling                  */
6629   PetscScalar     *Aa, *Ba;
6630   MatType          rtype;
6631   Mat_SeqAIJ      *a, *b;
6632   PetscObjectState state;
6633   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6634   PetscCall(PetscCalloc1(Bnnz, &Ba));
6635   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6636   if (cstart) {
6637     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6638   }
6639 
6640   PetscCall(MatGetRootType_Private(mat, &rtype));
6641 
6642   MatSeqXAIJGetOptions_Private(mpiaij->A);
6643   PetscCall(MatDestroy(&mpiaij->A));
6644   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6645   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6646   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6647 
6648   MatSeqXAIJGetOptions_Private(mpiaij->B);
6649   PetscCall(MatDestroy(&mpiaij->B));
6650   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6651   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6652   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6653 
6654   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6655   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6656   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6657   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6658 
6659   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6660   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6661   a->free_a  = PETSC_TRUE;
6662   a->free_ij = PETSC_TRUE;
6663   b->free_a  = PETSC_TRUE;
6664   b->free_ij = PETSC_TRUE;
6665   a->maxnz   = a->nz;
6666   b->maxnz   = b->nz;
6667 
6668   /* conversion must happen AFTER multiply setup */
6669   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6670   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6671   PetscCall(VecDestroy(&mpiaij->lvec));
6672   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6673 
6674   // Put the COO struct in a container and then attach that to the matrix
6675   PetscCall(PetscMalloc1(1, &coo));
6676   coo->n       = coo_n;
6677   coo->sf      = sf2;
6678   coo->sendlen = nleaves;
6679   coo->recvlen = nroots;
6680   coo->Annz    = Annz;
6681   coo->Bnnz    = Bnnz;
6682   coo->Annz2   = Annz2;
6683   coo->Bnnz2   = Bnnz2;
6684   coo->Atot1   = Atot1;
6685   coo->Atot2   = Atot2;
6686   coo->Btot1   = Btot1;
6687   coo->Btot2   = Btot2;
6688   coo->Ajmap1  = Ajmap1;
6689   coo->Aperm1  = Aperm1;
6690   coo->Bjmap1  = Bjmap1;
6691   coo->Bperm1  = Bperm1;
6692   coo->Aimap2  = Aimap2;
6693   coo->Ajmap2  = Ajmap2;
6694   coo->Aperm2  = Aperm2;
6695   coo->Bimap2  = Bimap2;
6696   coo->Bjmap2  = Bjmap2;
6697   coo->Bperm2  = Bperm2;
6698   coo->Cperm1  = Cperm1;
6699   // Allocate in preallocation. If not used, it has zero cost on host
6700   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6701   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6702   PetscCall(PetscContainerSetPointer(container, coo));
6703   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6704   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6705   PetscCall(PetscContainerDestroy(&container));
6706   PetscFunctionReturn(PETSC_SUCCESS);
6707 }
6708 
6709 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6710 {
6711   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6712   Mat                  A = mpiaij->A, B = mpiaij->B;
6713   PetscScalar         *Aa, *Ba;
6714   PetscScalar         *sendbuf, *recvbuf;
6715   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6716   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6717   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6718   const PetscCount    *Cperm1;
6719   PetscContainer       container;
6720   MatCOOStruct_MPIAIJ *coo;
6721 
6722   PetscFunctionBegin;
6723   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6724   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6725   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6726   sendbuf = coo->sendbuf;
6727   recvbuf = coo->recvbuf;
6728   Ajmap1  = coo->Ajmap1;
6729   Ajmap2  = coo->Ajmap2;
6730   Aimap2  = coo->Aimap2;
6731   Bjmap1  = coo->Bjmap1;
6732   Bjmap2  = coo->Bjmap2;
6733   Bimap2  = coo->Bimap2;
6734   Aperm1  = coo->Aperm1;
6735   Aperm2  = coo->Aperm2;
6736   Bperm1  = coo->Bperm1;
6737   Bperm2  = coo->Bperm2;
6738   Cperm1  = coo->Cperm1;
6739 
6740   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6741   PetscCall(MatSeqAIJGetArray(B, &Ba));
6742 
6743   /* Pack entries to be sent to remote */
6744   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6745 
6746   /* Send remote entries to their owner and overlap the communication with local computation */
6747   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6748   /* Add local entries to A and B */
6749   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6750     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6751     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6752     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6753   }
6754   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6755     PetscScalar sum = 0.0;
6756     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6757     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6758   }
6759   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6760 
6761   /* Add received remote entries to A and B */
6762   for (PetscCount i = 0; i < coo->Annz2; i++) {
6763     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6764   }
6765   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6766     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6767   }
6768   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6769   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6770   PetscFunctionReturn(PETSC_SUCCESS);
6771 }
6772 
6773 /*MC
6774    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6775 
6776    Options Database Keys:
6777 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6778 
6779    Level: beginner
6780 
6781    Notes:
6782    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6783     in this case the values associated with the rows and columns one passes in are set to zero
6784     in the matrix
6785 
6786     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6787     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6788 
6789 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6790 M*/
6791 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6792 {
6793   Mat_MPIAIJ *b;
6794   PetscMPIInt size;
6795 
6796   PetscFunctionBegin;
6797   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6798 
6799   PetscCall(PetscNew(&b));
6800   B->data       = (void *)b;
6801   B->ops[0]     = MatOps_Values;
6802   B->assembled  = PETSC_FALSE;
6803   B->insertmode = NOT_SET_VALUES;
6804   b->size       = size;
6805 
6806   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6807 
6808   /* build cache for off array entries formed */
6809   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6810 
6811   b->donotstash  = PETSC_FALSE;
6812   b->colmap      = NULL;
6813   b->garray      = NULL;
6814   b->roworiented = PETSC_TRUE;
6815 
6816   /* stuff used for matrix vector multiply */
6817   b->lvec  = NULL;
6818   b->Mvctx = NULL;
6819 
6820   /* stuff for MatGetRow() */
6821   b->rowindices   = NULL;
6822   b->rowvalues    = NULL;
6823   b->getrowactive = PETSC_FALSE;
6824 
6825   /* flexible pointer used in CUSPARSE classes */
6826   b->spptr = NULL;
6827 
6828   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6829   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6830   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6831   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6832   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6833   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6834   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6835   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6837   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6839 #if defined(PETSC_HAVE_CUDA)
6840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6841 #endif
6842 #if defined(PETSC_HAVE_HIP)
6843   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6844 #endif
6845 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6846   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6847 #endif
6848 #if defined(PETSC_HAVE_MKL_SPARSE)
6849   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6850 #endif
6851   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6852   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6853   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6854   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6855 #if defined(PETSC_HAVE_ELEMENTAL)
6856   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6857 #endif
6858 #if defined(PETSC_HAVE_SCALAPACK)
6859   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6860 #endif
6861   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6862   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6863 #if defined(PETSC_HAVE_HYPRE)
6864   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6865   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6866 #endif
6867   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6868   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6869   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6870   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6871   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6872   PetscFunctionReturn(PETSC_SUCCESS);
6873 }
6874 
6875 /*@
6876   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6877   and "off-diagonal" part of the matrix in CSR format.
6878 
6879   Collective
6880 
6881   Input Parameters:
6882 + comm - MPI communicator
6883 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6884 . n    - This value should be the same as the local size used in creating the
6885          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6886          calculated if `N` is given) For square matrices `n` is almost always `m`.
6887 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6888 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6889 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6890 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6891 . a    - matrix values
6892 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6893 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6894 - oa   - matrix values
6895 
6896   Output Parameter:
6897 . mat - the matrix
6898 
6899   Level: advanced
6900 
6901   Notes:
6902   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6903   must free the arrays once the matrix has been destroyed and not before.
6904 
6905   The `i` and `j` indices are 0 based
6906 
6907   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6908 
6909   This sets local rows and cannot be used to set off-processor values.
6910 
6911   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6912   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6913   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6914   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6915   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6916   communication if it is known that only local entries will be set.
6917 
6918 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6919           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6920 @*/
6921 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6922 {
6923   Mat_MPIAIJ *maij;
6924 
6925   PetscFunctionBegin;
6926   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6927   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6928   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6929   PetscCall(MatCreate(comm, mat));
6930   PetscCall(MatSetSizes(*mat, m, n, M, N));
6931   PetscCall(MatSetType(*mat, MATMPIAIJ));
6932   maij = (Mat_MPIAIJ *)(*mat)->data;
6933 
6934   (*mat)->preallocated = PETSC_TRUE;
6935 
6936   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6937   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6938 
6939   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6940   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6941 
6942   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6943   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6944   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6945   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6946   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6947   PetscFunctionReturn(PETSC_SUCCESS);
6948 }
6949 
6950 typedef struct {
6951   Mat       *mp;    /* intermediate products */
6952   PetscBool *mptmp; /* is the intermediate product temporary ? */
6953   PetscInt   cp;    /* number of intermediate products */
6954 
6955   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6956   PetscInt    *startsj_s, *startsj_r;
6957   PetscScalar *bufa;
6958   Mat          P_oth;
6959 
6960   /* may take advantage of merging product->B */
6961   Mat Bloc; /* B-local by merging diag and off-diag */
6962 
6963   /* cusparse does not have support to split between symbolic and numeric phases.
6964      When api_user is true, we don't need to update the numerical values
6965      of the temporary storage */
6966   PetscBool reusesym;
6967 
6968   /* support for COO values insertion */
6969   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6970   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6971   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6972   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6973   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6974   PetscMemType mtype;
6975 
6976   /* customization */
6977   PetscBool abmerge;
6978   PetscBool P_oth_bind;
6979 } MatMatMPIAIJBACKEND;
6980 
6981 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6982 {
6983   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6984   PetscInt             i;
6985 
6986   PetscFunctionBegin;
6987   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6988   PetscCall(PetscFree(mmdata->bufa));
6989   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6990   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6991   PetscCall(MatDestroy(&mmdata->P_oth));
6992   PetscCall(MatDestroy(&mmdata->Bloc));
6993   PetscCall(PetscSFDestroy(&mmdata->sf));
6994   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6995   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6996   PetscCall(PetscFree(mmdata->own[0]));
6997   PetscCall(PetscFree(mmdata->own));
6998   PetscCall(PetscFree(mmdata->off[0]));
6999   PetscCall(PetscFree(mmdata->off));
7000   PetscCall(PetscFree(mmdata));
7001   PetscFunctionReturn(PETSC_SUCCESS);
7002 }
7003 
7004 /* Copy selected n entries with indices in idx[] of A to v[].
7005    If idx is NULL, copy the whole data array of A to v[]
7006  */
7007 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7008 {
7009   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7010 
7011   PetscFunctionBegin;
7012   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7013   if (f) {
7014     PetscCall((*f)(A, n, idx, v));
7015   } else {
7016     const PetscScalar *vv;
7017 
7018     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7019     if (n && idx) {
7020       PetscScalar    *w  = v;
7021       const PetscInt *oi = idx;
7022       PetscInt        j;
7023 
7024       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7025     } else {
7026       PetscCall(PetscArraycpy(v, vv, n));
7027     }
7028     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7029   }
7030   PetscFunctionReturn(PETSC_SUCCESS);
7031 }
7032 
7033 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7034 {
7035   MatMatMPIAIJBACKEND *mmdata;
7036   PetscInt             i, n_d, n_o;
7037 
7038   PetscFunctionBegin;
7039   MatCheckProduct(C, 1);
7040   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7041   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7042   if (!mmdata->reusesym) { /* update temporary matrices */
7043     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7044     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7045   }
7046   mmdata->reusesym = PETSC_FALSE;
7047 
7048   for (i = 0; i < mmdata->cp; i++) {
7049     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7050     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7051   }
7052   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7053     PetscInt noff;
7054 
7055     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7056     if (mmdata->mptmp[i]) continue;
7057     if (noff) {
7058       PetscInt nown;
7059 
7060       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7061       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7062       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7063       n_o += noff;
7064       n_d += nown;
7065     } else {
7066       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7067 
7068       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7069       n_d += mm->nz;
7070     }
7071   }
7072   if (mmdata->hasoffproc) { /* offprocess insertion */
7073     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7074     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7075   }
7076   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7077   PetscFunctionReturn(PETSC_SUCCESS);
7078 }
7079 
7080 /* Support for Pt * A, A * P, or Pt * A * P */
7081 #define MAX_NUMBER_INTERMEDIATE 4
7082 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7083 {
7084   Mat_Product           *product = C->product;
7085   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7086   Mat_MPIAIJ            *a, *p;
7087   MatMatMPIAIJBACKEND   *mmdata;
7088   ISLocalToGlobalMapping P_oth_l2g = NULL;
7089   IS                     glob      = NULL;
7090   const char            *prefix;
7091   char                   pprefix[256];
7092   const PetscInt        *globidx, *P_oth_idx;
7093   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7094   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7095   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7096                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7097                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7098   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7099 
7100   MatProductType ptype;
7101   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7102   PetscMPIInt    size;
7103 
7104   PetscFunctionBegin;
7105   MatCheckProduct(C, 1);
7106   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7107   ptype = product->type;
7108   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7109     ptype                                          = MATPRODUCT_AB;
7110     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7111   }
7112   switch (ptype) {
7113   case MATPRODUCT_AB:
7114     A          = product->A;
7115     P          = product->B;
7116     m          = A->rmap->n;
7117     n          = P->cmap->n;
7118     M          = A->rmap->N;
7119     N          = P->cmap->N;
7120     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7121     break;
7122   case MATPRODUCT_AtB:
7123     P          = product->A;
7124     A          = product->B;
7125     m          = P->cmap->n;
7126     n          = A->cmap->n;
7127     M          = P->cmap->N;
7128     N          = A->cmap->N;
7129     hasoffproc = PETSC_TRUE;
7130     break;
7131   case MATPRODUCT_PtAP:
7132     A          = product->A;
7133     P          = product->B;
7134     m          = P->cmap->n;
7135     n          = P->cmap->n;
7136     M          = P->cmap->N;
7137     N          = P->cmap->N;
7138     hasoffproc = PETSC_TRUE;
7139     break;
7140   default:
7141     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7142   }
7143   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7144   if (size == 1) hasoffproc = PETSC_FALSE;
7145 
7146   /* defaults */
7147   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7148     mp[i]    = NULL;
7149     mptmp[i] = PETSC_FALSE;
7150     rmapt[i] = -1;
7151     cmapt[i] = -1;
7152     rmapa[i] = NULL;
7153     cmapa[i] = NULL;
7154   }
7155 
7156   /* customization */
7157   PetscCall(PetscNew(&mmdata));
7158   mmdata->reusesym = product->api_user;
7159   if (ptype == MATPRODUCT_AB) {
7160     if (product->api_user) {
7161       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7162       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7163       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7164       PetscOptionsEnd();
7165     } else {
7166       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7167       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7168       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7169       PetscOptionsEnd();
7170     }
7171   } else if (ptype == MATPRODUCT_PtAP) {
7172     if (product->api_user) {
7173       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7174       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7175       PetscOptionsEnd();
7176     } else {
7177       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7178       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7179       PetscOptionsEnd();
7180     }
7181   }
7182   a = (Mat_MPIAIJ *)A->data;
7183   p = (Mat_MPIAIJ *)P->data;
7184   PetscCall(MatSetSizes(C, m, n, M, N));
7185   PetscCall(PetscLayoutSetUp(C->rmap));
7186   PetscCall(PetscLayoutSetUp(C->cmap));
7187   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7188   PetscCall(MatGetOptionsPrefix(C, &prefix));
7189 
7190   cp = 0;
7191   switch (ptype) {
7192   case MATPRODUCT_AB: /* A * P */
7193     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7194 
7195     /* A_diag * P_local (merged or not) */
7196     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7197       /* P is product->B */
7198       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7199       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7200       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7201       PetscCall(MatProductSetFill(mp[cp], product->fill));
7202       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7203       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7204       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7205       mp[cp]->product->api_user = product->api_user;
7206       PetscCall(MatProductSetFromOptions(mp[cp]));
7207       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7208       PetscCall(ISGetIndices(glob, &globidx));
7209       rmapt[cp] = 1;
7210       cmapt[cp] = 2;
7211       cmapa[cp] = globidx;
7212       mptmp[cp] = PETSC_FALSE;
7213       cp++;
7214     } else { /* A_diag * P_diag and A_diag * P_off */
7215       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7216       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7217       PetscCall(MatProductSetFill(mp[cp], product->fill));
7218       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7219       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7220       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7221       mp[cp]->product->api_user = product->api_user;
7222       PetscCall(MatProductSetFromOptions(mp[cp]));
7223       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7224       rmapt[cp] = 1;
7225       cmapt[cp] = 1;
7226       mptmp[cp] = PETSC_FALSE;
7227       cp++;
7228       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7229       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7230       PetscCall(MatProductSetFill(mp[cp], product->fill));
7231       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7232       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7233       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7234       mp[cp]->product->api_user = product->api_user;
7235       PetscCall(MatProductSetFromOptions(mp[cp]));
7236       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7237       rmapt[cp] = 1;
7238       cmapt[cp] = 2;
7239       cmapa[cp] = p->garray;
7240       mptmp[cp] = PETSC_FALSE;
7241       cp++;
7242     }
7243 
7244     /* A_off * P_other */
7245     if (mmdata->P_oth) {
7246       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7247       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7248       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7249       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7250       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7251       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7252       PetscCall(MatProductSetFill(mp[cp], product->fill));
7253       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7254       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7255       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7256       mp[cp]->product->api_user = product->api_user;
7257       PetscCall(MatProductSetFromOptions(mp[cp]));
7258       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7259       rmapt[cp] = 1;
7260       cmapt[cp] = 2;
7261       cmapa[cp] = P_oth_idx;
7262       mptmp[cp] = PETSC_FALSE;
7263       cp++;
7264     }
7265     break;
7266 
7267   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7268     /* A is product->B */
7269     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7270     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7271       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7272       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7273       PetscCall(MatProductSetFill(mp[cp], product->fill));
7274       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7275       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7276       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7277       mp[cp]->product->api_user = product->api_user;
7278       PetscCall(MatProductSetFromOptions(mp[cp]));
7279       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7280       PetscCall(ISGetIndices(glob, &globidx));
7281       rmapt[cp] = 2;
7282       rmapa[cp] = globidx;
7283       cmapt[cp] = 2;
7284       cmapa[cp] = globidx;
7285       mptmp[cp] = PETSC_FALSE;
7286       cp++;
7287     } else {
7288       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7289       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7290       PetscCall(MatProductSetFill(mp[cp], product->fill));
7291       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7292       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7293       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7294       mp[cp]->product->api_user = product->api_user;
7295       PetscCall(MatProductSetFromOptions(mp[cp]));
7296       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7297       PetscCall(ISGetIndices(glob, &globidx));
7298       rmapt[cp] = 1;
7299       cmapt[cp] = 2;
7300       cmapa[cp] = globidx;
7301       mptmp[cp] = PETSC_FALSE;
7302       cp++;
7303       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7304       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7305       PetscCall(MatProductSetFill(mp[cp], product->fill));
7306       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7307       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7308       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7309       mp[cp]->product->api_user = product->api_user;
7310       PetscCall(MatProductSetFromOptions(mp[cp]));
7311       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7312       rmapt[cp] = 2;
7313       rmapa[cp] = p->garray;
7314       cmapt[cp] = 2;
7315       cmapa[cp] = globidx;
7316       mptmp[cp] = PETSC_FALSE;
7317       cp++;
7318     }
7319     break;
7320   case MATPRODUCT_PtAP:
7321     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7322     /* P is product->B */
7323     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7324     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7325     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7326     PetscCall(MatProductSetFill(mp[cp], product->fill));
7327     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7328     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7329     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7330     mp[cp]->product->api_user = product->api_user;
7331     PetscCall(MatProductSetFromOptions(mp[cp]));
7332     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7333     PetscCall(ISGetIndices(glob, &globidx));
7334     rmapt[cp] = 2;
7335     rmapa[cp] = globidx;
7336     cmapt[cp] = 2;
7337     cmapa[cp] = globidx;
7338     mptmp[cp] = PETSC_FALSE;
7339     cp++;
7340     if (mmdata->P_oth) {
7341       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7342       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7343       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7344       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7345       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7346       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7347       PetscCall(MatProductSetFill(mp[cp], product->fill));
7348       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7349       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7350       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7351       mp[cp]->product->api_user = product->api_user;
7352       PetscCall(MatProductSetFromOptions(mp[cp]));
7353       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7354       mptmp[cp] = PETSC_TRUE;
7355       cp++;
7356       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7357       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7358       PetscCall(MatProductSetFill(mp[cp], product->fill));
7359       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7360       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7361       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7362       mp[cp]->product->api_user = product->api_user;
7363       PetscCall(MatProductSetFromOptions(mp[cp]));
7364       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7365       rmapt[cp] = 2;
7366       rmapa[cp] = globidx;
7367       cmapt[cp] = 2;
7368       cmapa[cp] = P_oth_idx;
7369       mptmp[cp] = PETSC_FALSE;
7370       cp++;
7371     }
7372     break;
7373   default:
7374     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7375   }
7376   /* sanity check */
7377   if (size > 1)
7378     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7379 
7380   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7381   for (i = 0; i < cp; i++) {
7382     mmdata->mp[i]    = mp[i];
7383     mmdata->mptmp[i] = mptmp[i];
7384   }
7385   mmdata->cp             = cp;
7386   C->product->data       = mmdata;
7387   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7388   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7389 
7390   /* memory type */
7391   mmdata->mtype = PETSC_MEMTYPE_HOST;
7392   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7393   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7394   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7395   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7396   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7397   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7398 
7399   /* prepare coo coordinates for values insertion */
7400 
7401   /* count total nonzeros of those intermediate seqaij Mats
7402     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7403     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7404     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7405   */
7406   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7407     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7408     if (mptmp[cp]) continue;
7409     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7410       const PetscInt *rmap = rmapa[cp];
7411       const PetscInt  mr   = mp[cp]->rmap->n;
7412       const PetscInt  rs   = C->rmap->rstart;
7413       const PetscInt  re   = C->rmap->rend;
7414       const PetscInt *ii   = mm->i;
7415       for (i = 0; i < mr; i++) {
7416         const PetscInt gr = rmap[i];
7417         const PetscInt nz = ii[i + 1] - ii[i];
7418         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7419         else ncoo_oown += nz;                  /* this row is local */
7420       }
7421     } else ncoo_d += mm->nz;
7422   }
7423 
7424   /*
7425     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7426 
7427     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7428 
7429     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7430 
7431     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7432     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7433     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7434 
7435     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7436     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7437   */
7438   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7439   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7440 
7441   /* gather (i,j) of nonzeros inserted by remote procs */
7442   if (hasoffproc) {
7443     PetscSF  msf;
7444     PetscInt ncoo2, *coo_i2, *coo_j2;
7445 
7446     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7447     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7448     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7449 
7450     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7451       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7452       PetscInt   *idxoff = mmdata->off[cp];
7453       PetscInt   *idxown = mmdata->own[cp];
7454       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7455         const PetscInt *rmap = rmapa[cp];
7456         const PetscInt *cmap = cmapa[cp];
7457         const PetscInt *ii   = mm->i;
7458         PetscInt       *coi  = coo_i + ncoo_o;
7459         PetscInt       *coj  = coo_j + ncoo_o;
7460         const PetscInt  mr   = mp[cp]->rmap->n;
7461         const PetscInt  rs   = C->rmap->rstart;
7462         const PetscInt  re   = C->rmap->rend;
7463         const PetscInt  cs   = C->cmap->rstart;
7464         for (i = 0; i < mr; i++) {
7465           const PetscInt *jj = mm->j + ii[i];
7466           const PetscInt  gr = rmap[i];
7467           const PetscInt  nz = ii[i + 1] - ii[i];
7468           if (gr < rs || gr >= re) { /* this is an offproc row */
7469             for (j = ii[i]; j < ii[i + 1]; j++) {
7470               *coi++    = gr;
7471               *idxoff++ = j;
7472             }
7473             if (!cmapt[cp]) { /* already global */
7474               for (j = 0; j < nz; j++) *coj++ = jj[j];
7475             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7476               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7477             } else { /* offdiag */
7478               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7479             }
7480             ncoo_o += nz;
7481           } else { /* this is a local row */
7482             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7483           }
7484         }
7485       }
7486       mmdata->off[cp + 1] = idxoff;
7487       mmdata->own[cp + 1] = idxown;
7488     }
7489 
7490     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7491     PetscInt incoo_o;
7492     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7493     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7494     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7495     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7496     ncoo = ncoo_d + ncoo_oown + ncoo2;
7497     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7498     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7499     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7500     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7501     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7502     PetscCall(PetscFree2(coo_i, coo_j));
7503     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7504     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7505     coo_i = coo_i2;
7506     coo_j = coo_j2;
7507   } else { /* no offproc values insertion */
7508     ncoo = ncoo_d;
7509     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7510 
7511     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7512     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7513     PetscCall(PetscSFSetUp(mmdata->sf));
7514   }
7515   mmdata->hasoffproc = hasoffproc;
7516 
7517   /* gather (i,j) of nonzeros inserted locally */
7518   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7519     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7520     PetscInt       *coi  = coo_i + ncoo_d;
7521     PetscInt       *coj  = coo_j + ncoo_d;
7522     const PetscInt *jj   = mm->j;
7523     const PetscInt *ii   = mm->i;
7524     const PetscInt *cmap = cmapa[cp];
7525     const PetscInt *rmap = rmapa[cp];
7526     const PetscInt  mr   = mp[cp]->rmap->n;
7527     const PetscInt  rs   = C->rmap->rstart;
7528     const PetscInt  re   = C->rmap->rend;
7529     const PetscInt  cs   = C->cmap->rstart;
7530 
7531     if (mptmp[cp]) continue;
7532     if (rmapt[cp] == 1) { /* consecutive rows */
7533       /* fill coo_i */
7534       for (i = 0; i < mr; i++) {
7535         const PetscInt gr = i + rs;
7536         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7537       }
7538       /* fill coo_j */
7539       if (!cmapt[cp]) { /* type-0, already global */
7540         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7541       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7542         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7543       } else {                                            /* type-2, local to global for sparse columns */
7544         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7545       }
7546       ncoo_d += mm->nz;
7547     } else if (rmapt[cp] == 2) { /* sparse rows */
7548       for (i = 0; i < mr; i++) {
7549         const PetscInt *jj = mm->j + ii[i];
7550         const PetscInt  gr = rmap[i];
7551         const PetscInt  nz = ii[i + 1] - ii[i];
7552         if (gr >= rs && gr < re) { /* local rows */
7553           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7554           if (!cmapt[cp]) { /* type-0, already global */
7555             for (j = 0; j < nz; j++) *coj++ = jj[j];
7556           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7557             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7558           } else { /* type-2, local to global for sparse columns */
7559             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7560           }
7561           ncoo_d += nz;
7562         }
7563       }
7564     }
7565   }
7566   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7567   PetscCall(ISDestroy(&glob));
7568   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7569   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7570   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7571   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7572 
7573   /* set block sizes */
7574   A = product->A;
7575   P = product->B;
7576   switch (ptype) {
7577   case MATPRODUCT_PtAP:
7578     PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs));
7579     break;
7580   case MATPRODUCT_RARt:
7581     PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs));
7582     break;
7583   case MATPRODUCT_ABC:
7584     PetscCall(MatSetBlockSizesFromMats(C, A, product->C));
7585     break;
7586   case MATPRODUCT_AB:
7587     PetscCall(MatSetBlockSizesFromMats(C, A, P));
7588     break;
7589   case MATPRODUCT_AtB:
7590     PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs));
7591     break;
7592   case MATPRODUCT_ABt:
7593     PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs));
7594     break;
7595   default:
7596     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]);
7597   }
7598 
7599   /* preallocate with COO data */
7600   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7601   PetscCall(PetscFree2(coo_i, coo_j));
7602   PetscFunctionReturn(PETSC_SUCCESS);
7603 }
7604 
7605 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7606 {
7607   Mat_Product *product = mat->product;
7608 #if defined(PETSC_HAVE_DEVICE)
7609   PetscBool match  = PETSC_FALSE;
7610   PetscBool usecpu = PETSC_FALSE;
7611 #else
7612   PetscBool match = PETSC_TRUE;
7613 #endif
7614 
7615   PetscFunctionBegin;
7616   MatCheckProduct(mat, 1);
7617 #if defined(PETSC_HAVE_DEVICE)
7618   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7619   if (match) { /* we can always fallback to the CPU if requested */
7620     switch (product->type) {
7621     case MATPRODUCT_AB:
7622       if (product->api_user) {
7623         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7624         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7625         PetscOptionsEnd();
7626       } else {
7627         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7628         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7629         PetscOptionsEnd();
7630       }
7631       break;
7632     case MATPRODUCT_AtB:
7633       if (product->api_user) {
7634         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7635         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7636         PetscOptionsEnd();
7637       } else {
7638         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7639         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7640         PetscOptionsEnd();
7641       }
7642       break;
7643     case MATPRODUCT_PtAP:
7644       if (product->api_user) {
7645         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7646         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7647         PetscOptionsEnd();
7648       } else {
7649         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7650         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7651         PetscOptionsEnd();
7652       }
7653       break;
7654     default:
7655       break;
7656     }
7657     match = (PetscBool)!usecpu;
7658   }
7659 #endif
7660   if (match) {
7661     switch (product->type) {
7662     case MATPRODUCT_AB:
7663     case MATPRODUCT_AtB:
7664     case MATPRODUCT_PtAP:
7665       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7666       break;
7667     default:
7668       break;
7669     }
7670   }
7671   /* fallback to MPIAIJ ops */
7672   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7673   PetscFunctionReturn(PETSC_SUCCESS);
7674 }
7675 
7676 /*
7677    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7678 
7679    n - the number of block indices in cc[]
7680    cc - the block indices (must be large enough to contain the indices)
7681 */
7682 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7683 {
7684   PetscInt        cnt = -1, nidx, j;
7685   const PetscInt *idx;
7686 
7687   PetscFunctionBegin;
7688   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7689   if (nidx) {
7690     cnt     = 0;
7691     cc[cnt] = idx[0] / bs;
7692     for (j = 1; j < nidx; j++) {
7693       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7694     }
7695   }
7696   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7697   *n = cnt + 1;
7698   PetscFunctionReturn(PETSC_SUCCESS);
7699 }
7700 
7701 /*
7702     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7703 
7704     ncollapsed - the number of block indices
7705     collapsed - the block indices (must be large enough to contain the indices)
7706 */
7707 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7708 {
7709   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7710 
7711   PetscFunctionBegin;
7712   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7713   for (i = start + 1; i < start + bs; i++) {
7714     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7715     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7716     cprevtmp = cprev;
7717     cprev    = merged;
7718     merged   = cprevtmp;
7719   }
7720   *ncollapsed = nprev;
7721   if (collapsed) *collapsed = cprev;
7722   PetscFunctionReturn(PETSC_SUCCESS);
7723 }
7724 
7725 /*
7726  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7727 
7728  Input Parameter:
7729  . Amat - matrix
7730  - symmetrize - make the result symmetric
7731  + scale - scale with diagonal
7732 
7733  Output Parameter:
7734  . a_Gmat - output scalar graph >= 0
7735 
7736 */
7737 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7738 {
7739   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7740   MPI_Comm  comm;
7741   Mat       Gmat;
7742   PetscBool ismpiaij, isseqaij;
7743   Mat       a, b, c;
7744   MatType   jtype;
7745 
7746   PetscFunctionBegin;
7747   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7748   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7749   PetscCall(MatGetSize(Amat, &MM, &NN));
7750   PetscCall(MatGetBlockSize(Amat, &bs));
7751   nloc = (Iend - Istart) / bs;
7752 
7753   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7754   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7755   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7756 
7757   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7758   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7759      implementation */
7760   if (bs > 1) {
7761     PetscCall(MatGetType(Amat, &jtype));
7762     PetscCall(MatCreate(comm, &Gmat));
7763     PetscCall(MatSetType(Gmat, jtype));
7764     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7765     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7766     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7767       PetscInt  *d_nnz, *o_nnz;
7768       MatScalar *aa, val, *AA;
7769       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7770 
7771       if (isseqaij) {
7772         a = Amat;
7773         b = NULL;
7774       } else {
7775         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7776         a             = d->A;
7777         b             = d->B;
7778       }
7779       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7780       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7781       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7782         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7783         const PetscInt *cols1, *cols2;
7784 
7785         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7786           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7787           nnz[brow / bs] = nc2 / bs;
7788           if (nc2 % bs) ok = 0;
7789           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7790           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7791             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7792             if (nc1 != nc2) ok = 0;
7793             else {
7794               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7795                 if (cols1[jj] != cols2[jj]) ok = 0;
7796                 if (cols1[jj] % bs != jj % bs) ok = 0;
7797               }
7798             }
7799             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7800           }
7801           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7802           if (!ok) {
7803             PetscCall(PetscFree2(d_nnz, o_nnz));
7804             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7805             goto old_bs;
7806           }
7807         }
7808       }
7809       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7810       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7811       PetscCall(PetscFree2(d_nnz, o_nnz));
7812       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7813       // diag
7814       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7815         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7816 
7817         ai = aseq->i;
7818         n  = ai[brow + 1] - ai[brow];
7819         aj = aseq->j + ai[brow];
7820         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7821           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7822           val        = 0;
7823           if (index_size == 0) {
7824             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7825               aa = aseq->a + ai[brow + ii] + k;
7826               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7827                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7828               }
7829             }
7830           } else {                                            // use (index,index) value if provided
7831             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7832               PetscInt ii = index[iii];
7833               aa          = aseq->a + ai[brow + ii] + k;
7834               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7835                 PetscInt jj = index[jjj];
7836                 val += PetscAbs(PetscRealPart(aa[jj]));
7837               }
7838             }
7839           }
7840           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7841           AA[k / bs] = val;
7842         }
7843         grow = Istart / bs + brow / bs;
7844         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7845       }
7846       // off-diag
7847       if (ismpiaij) {
7848         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7849         const PetscScalar *vals;
7850         const PetscInt    *cols, *garray = aij->garray;
7851 
7852         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7853         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7854           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7855           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7856             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7857             AA[k / bs] = 0;
7858             AJ[cidx]   = garray[cols[k]] / bs;
7859           }
7860           nc = ncols / bs;
7861           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7862           if (index_size == 0) {
7863             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7864               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7865               for (PetscInt k = 0; k < ncols; k += bs) {
7866                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7867                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7868                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7869                 }
7870               }
7871               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7872             }
7873           } else {                                            // use (index,index) value if provided
7874             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7875               PetscInt ii = index[iii];
7876               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7877               for (PetscInt k = 0; k < ncols; k += bs) {
7878                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7879                   PetscInt jj = index[jjj];
7880                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7881                 }
7882               }
7883               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7884             }
7885           }
7886           grow = Istart / bs + brow / bs;
7887           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7888         }
7889       }
7890       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7891       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7892       PetscCall(PetscFree2(AA, AJ));
7893     } else {
7894       const PetscScalar *vals;
7895       const PetscInt    *idx;
7896       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7897     old_bs:
7898       /*
7899        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7900        */
7901       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7902       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7903       if (isseqaij) {
7904         PetscInt max_d_nnz;
7905 
7906         /*
7907          Determine exact preallocation count for (sequential) scalar matrix
7908          */
7909         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7910         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7911         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7912         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7913         PetscCall(PetscFree3(w0, w1, w2));
7914       } else if (ismpiaij) {
7915         Mat             Daij, Oaij;
7916         const PetscInt *garray;
7917         PetscInt        max_d_nnz;
7918 
7919         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7920         /*
7921          Determine exact preallocation count for diagonal block portion of scalar matrix
7922          */
7923         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7924         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7925         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7926         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7927         PetscCall(PetscFree3(w0, w1, w2));
7928         /*
7929          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7930          */
7931         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7932           o_nnz[jj] = 0;
7933           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7934             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7935             o_nnz[jj] += ncols;
7936             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7937           }
7938           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7939         }
7940       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7941       /* get scalar copy (norms) of matrix */
7942       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7943       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7944       PetscCall(PetscFree2(d_nnz, o_nnz));
7945       for (Ii = Istart; Ii < Iend; Ii++) {
7946         PetscInt dest_row = Ii / bs;
7947 
7948         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7949         for (jj = 0; jj < ncols; jj++) {
7950           PetscInt    dest_col = idx[jj] / bs;
7951           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7952 
7953           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7954         }
7955         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7956       }
7957       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7958       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7959     }
7960   } else {
7961     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7962     else {
7963       Gmat = Amat;
7964       PetscCall(PetscObjectReference((PetscObject)Gmat));
7965     }
7966     if (isseqaij) {
7967       a = Gmat;
7968       b = NULL;
7969     } else {
7970       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7971       a             = d->A;
7972       b             = d->B;
7973     }
7974     if (filter >= 0 || scale) {
7975       /* take absolute value of each entry */
7976       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7977         MatInfo      info;
7978         PetscScalar *avals;
7979 
7980         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7981         PetscCall(MatSeqAIJGetArray(c, &avals));
7982         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7983         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7984       }
7985     }
7986   }
7987   if (symmetrize) {
7988     PetscBool isset, issym;
7989 
7990     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7991     if (!isset || !issym) {
7992       Mat matTrans;
7993 
7994       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7995       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7996       PetscCall(MatDestroy(&matTrans));
7997     }
7998     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7999   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8000   if (scale) {
8001     /* scale c for all diagonal values = 1 or -1 */
8002     Vec diag;
8003 
8004     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8005     PetscCall(MatGetDiagonal(Gmat, diag));
8006     PetscCall(VecReciprocal(diag));
8007     PetscCall(VecSqrtAbs(diag));
8008     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8009     PetscCall(VecDestroy(&diag));
8010   }
8011   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8012   if (filter >= 0) {
8013     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8014     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8015   }
8016   *a_Gmat = Gmat;
8017   PetscFunctionReturn(PETSC_SUCCESS);
8018 }
8019 
8020 /*
8021     Special version for direct calls from Fortran
8022 */
8023 
8024 /* Change these macros so can be used in void function */
8025 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8026 #undef PetscCall
8027 #define PetscCall(...) \
8028   do { \
8029     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8030     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8031       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8032       return; \
8033     } \
8034   } while (0)
8035 
8036 #undef SETERRQ
8037 #define SETERRQ(comm, ierr, ...) \
8038   do { \
8039     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8040     return; \
8041   } while (0)
8042 
8043 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8044   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8045 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8046   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8047 #else
8048 #endif
8049 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8050 {
8051   Mat         mat = *mmat;
8052   PetscInt    m = *mm, n = *mn;
8053   InsertMode  addv = *maddv;
8054   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8055   PetscScalar value;
8056 
8057   MatCheckPreallocated(mat, 1);
8058   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8059   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8060   {
8061     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8062     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8063     PetscBool roworiented = aij->roworiented;
8064 
8065     /* Some Variables required in the macro */
8066     Mat         A     = aij->A;
8067     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8068     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8069     MatScalar  *aa;
8070     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8071     Mat         B                 = aij->B;
8072     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8073     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8074     MatScalar  *ba;
8075     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8076      * cannot use "#if defined" inside a macro. */
8077     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8078 
8079     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8080     PetscInt   nonew = a->nonew;
8081     MatScalar *ap1, *ap2;
8082 
8083     PetscFunctionBegin;
8084     PetscCall(MatSeqAIJGetArray(A, &aa));
8085     PetscCall(MatSeqAIJGetArray(B, &ba));
8086     for (i = 0; i < m; i++) {
8087       if (im[i] < 0) continue;
8088       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8089       if (im[i] >= rstart && im[i] < rend) {
8090         row      = im[i] - rstart;
8091         lastcol1 = -1;
8092         rp1      = aj + ai[row];
8093         ap1      = aa + ai[row];
8094         rmax1    = aimax[row];
8095         nrow1    = ailen[row];
8096         low1     = 0;
8097         high1    = nrow1;
8098         lastcol2 = -1;
8099         rp2      = bj + bi[row];
8100         ap2      = ba + bi[row];
8101         rmax2    = bimax[row];
8102         nrow2    = bilen[row];
8103         low2     = 0;
8104         high2    = nrow2;
8105 
8106         for (j = 0; j < n; j++) {
8107           if (roworiented) value = v[i * n + j];
8108           else value = v[i + j * m];
8109           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8110           if (in[j] >= cstart && in[j] < cend) {
8111             col = in[j] - cstart;
8112             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8113           } else if (in[j] < 0) continue;
8114           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8115             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8116           } else {
8117             if (mat->was_assembled) {
8118               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8119 #if defined(PETSC_USE_CTABLE)
8120               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8121               col--;
8122 #else
8123               col = aij->colmap[in[j]] - 1;
8124 #endif
8125               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8126                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8127                 col = in[j];
8128                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8129                 B        = aij->B;
8130                 b        = (Mat_SeqAIJ *)B->data;
8131                 bimax    = b->imax;
8132                 bi       = b->i;
8133                 bilen    = b->ilen;
8134                 bj       = b->j;
8135                 rp2      = bj + bi[row];
8136                 ap2      = ba + bi[row];
8137                 rmax2    = bimax[row];
8138                 nrow2    = bilen[row];
8139                 low2     = 0;
8140                 high2    = nrow2;
8141                 bm       = aij->B->rmap->n;
8142                 ba       = b->a;
8143                 inserted = PETSC_FALSE;
8144               }
8145             } else col = in[j];
8146             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8147           }
8148         }
8149       } else if (!aij->donotstash) {
8150         if (roworiented) {
8151           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8152         } else {
8153           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8154         }
8155       }
8156     }
8157     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8158     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8159   }
8160   PetscFunctionReturnVoid();
8161 }
8162 
8163 /* Undefining these here since they were redefined from their original definition above! No
8164  * other PETSc functions should be defined past this point, as it is impossible to recover the
8165  * original definitions */
8166 #undef PetscCall
8167 #undef SETERRQ
8168