xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision eba205da1f2bebf3e4e739bb621f628fb5831709)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309   PetscMPIInt        in;
310 
311   PetscFunctionBegin;
312   PetscCall(MatGetSize(A, &m, &n));
313   PetscCall(PetscCalloc1(n, &work));
314   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
316   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
317   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
318   if (type == NORM_2) {
319     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
320     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
321   } else if (type == NORM_1) {
322     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
323     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
324   } else if (type == NORM_INFINITY) {
325     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
326     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
327   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
328     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
329     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
330   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
331     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
332     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
333   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
334   PetscCall(PetscMPIIntCast(n, &in));
335   if (type == NORM_INFINITY) {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
337   } else {
338     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
339   }
340   PetscCall(PetscFree(work));
341   if (type == NORM_2) {
342     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
343   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
344     for (i = 0; i < n; i++) reductions[i] /= m;
345   }
346   PetscFunctionReturn(PETSC_SUCCESS);
347 }
348 
349 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
350 {
351   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
352   IS              sis, gis;
353   const PetscInt *isis, *igis;
354   PetscInt        n, *iis, nsis, ngis, rstart, i;
355 
356   PetscFunctionBegin;
357   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
358   PetscCall(MatFindNonzeroRows(a->B, &gis));
359   PetscCall(ISGetSize(gis, &ngis));
360   PetscCall(ISGetSize(sis, &nsis));
361   PetscCall(ISGetIndices(sis, &isis));
362   PetscCall(ISGetIndices(gis, &igis));
363 
364   PetscCall(PetscMalloc1(ngis + nsis, &iis));
365   PetscCall(PetscArraycpy(iis, igis, ngis));
366   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
367   n = ngis + nsis;
368   PetscCall(PetscSortRemoveDupsInt(&n, iis));
369   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
370   for (i = 0; i < n; i++) iis[i] += rstart;
371   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
372 
373   PetscCall(ISRestoreIndices(sis, &isis));
374   PetscCall(ISRestoreIndices(gis, &igis));
375   PetscCall(ISDestroy(&sis));
376   PetscCall(ISDestroy(&gis));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 /*
381   Local utility routine that creates a mapping from the global column
382 number to the local number in the off-diagonal part of the local
383 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
384 a slightly higher hash table cost; without it it is not scalable (each processor
385 has an order N integer array but is fast to access.
386 */
387 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
388 {
389   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
390   PetscInt    n   = aij->B->cmap->n, i;
391 
392   PetscFunctionBegin;
393   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
394 #if defined(PETSC_USE_CTABLE)
395   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
396   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
397 #else
398   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
399   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
400 #endif
401   PetscFunctionReturn(PETSC_SUCCESS);
402 }
403 
404 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
405   do { \
406     if (col <= lastcol1) low1 = 0; \
407     else high1 = nrow1; \
408     lastcol1 = col; \
409     while (high1 - low1 > 5) { \
410       t = (low1 + high1) / 2; \
411       if (rp1[t] > col) high1 = t; \
412       else low1 = t; \
413     } \
414     for (_i = low1; _i < high1; _i++) { \
415       if (rp1[_i] > col) break; \
416       if (rp1[_i] == col) { \
417         if (addv == ADD_VALUES) { \
418           ap1[_i] += value; \
419           /* Not sure LogFlops will slow dow the code or not */ \
420           (void)PetscLogFlops(1.0); \
421         } else ap1[_i] = value; \
422         goto a_noinsert; \
423       } \
424     } \
425     if (value == 0.0 && ignorezeroentries && row != col) { \
426       low1  = 0; \
427       high1 = nrow1; \
428       goto a_noinsert; \
429     } \
430     if (nonew == 1) { \
431       low1  = 0; \
432       high1 = nrow1; \
433       goto a_noinsert; \
434     } \
435     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
436     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
437     N = nrow1++ - 1; \
438     a->nz++; \
439     high1++; \
440     /* shift up all the later entries in this row */ \
441     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
442     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
443     rp1[_i] = col; \
444     ap1[_i] = value; \
445   a_noinsert:; \
446     ailen[row] = nrow1; \
447   } while (0)
448 
449 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
450   do { \
451     if (col <= lastcol2) low2 = 0; \
452     else high2 = nrow2; \
453     lastcol2 = col; \
454     while (high2 - low2 > 5) { \
455       t = (low2 + high2) / 2; \
456       if (rp2[t] > col) high2 = t; \
457       else low2 = t; \
458     } \
459     for (_i = low2; _i < high2; _i++) { \
460       if (rp2[_i] > col) break; \
461       if (rp2[_i] == col) { \
462         if (addv == ADD_VALUES) { \
463           ap2[_i] += value; \
464           (void)PetscLogFlops(1.0); \
465         } else ap2[_i] = value; \
466         goto b_noinsert; \
467       } \
468     } \
469     if (value == 0.0 && ignorezeroentries) { \
470       low2  = 0; \
471       high2 = nrow2; \
472       goto b_noinsert; \
473     } \
474     if (nonew == 1) { \
475       low2  = 0; \
476       high2 = nrow2; \
477       goto b_noinsert; \
478     } \
479     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
480     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
481     N = nrow2++ - 1; \
482     b->nz++; \
483     high2++; \
484     /* shift up all the later entries in this row */ \
485     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
486     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
487     rp2[_i] = col; \
488     ap2[_i] = value; \
489   b_noinsert:; \
490     bilen[row] = nrow2; \
491   } while (0)
492 
493 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
494 {
495   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
496   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
497   PetscInt     l, *garray                         = mat->garray, diag;
498   PetscScalar *aa, *ba;
499 
500   PetscFunctionBegin;
501   /* code only works for square matrices A */
502 
503   /* find size of row to the left of the diagonal part */
504   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
505   row = row - diag;
506   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
507     if (garray[b->j[b->i[row] + l]] > diag) break;
508   }
509   if (l) {
510     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
511     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
512     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
513   }
514 
515   /* diagonal part */
516   if (a->i[row + 1] - a->i[row]) {
517     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
518     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
519     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
520   }
521 
522   /* right of diagonal part */
523   if (b->i[row + 1] - b->i[row] - l) {
524     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
525     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
526     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
527   }
528   PetscFunctionReturn(PETSC_SUCCESS);
529 }
530 
531 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
532 {
533   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
534   PetscScalar value = 0.0;
535   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
536   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
537   PetscBool   roworiented = aij->roworiented;
538 
539   /* Some Variables required in the macro */
540   Mat         A     = aij->A;
541   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
542   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
543   PetscBool   ignorezeroentries = a->ignorezeroentries;
544   Mat         B                 = aij->B;
545   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
546   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
547   MatScalar  *aa, *ba;
548   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
549   PetscInt    nonew;
550   MatScalar  *ap1, *ap2;
551 
552   PetscFunctionBegin;
553   PetscCall(MatSeqAIJGetArray(A, &aa));
554   PetscCall(MatSeqAIJGetArray(B, &ba));
555   for (i = 0; i < m; i++) {
556     if (im[i] < 0) continue;
557     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
562       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
569       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j = 0; j < n; j++) {
576         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
577         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
578         if (in[j] >= cstart && in[j] < cend) {
579           col   = in[j] - cstart;
580           nonew = a->nonew;
581           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
582         } else if (in[j] < 0) {
583           continue;
584         } else {
585           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
586           if (mat->was_assembled) {
587             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
588 #if defined(PETSC_USE_CTABLE)
589             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
590             col--;
591 #else
592             col = aij->colmap[in[j]] - 1;
593 #endif
594             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
595               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
596               col = in[j];
597               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
598               B     = aij->B;
599               b     = (Mat_SeqAIJ *)B->data;
600               bimax = b->imax;
601               bi    = b->i;
602               bilen = b->ilen;
603               bj    = b->j;
604               ba    = b->a;
605               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
606               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
614               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
615                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
616               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
617             }
618           } else col = in[j];
619           nonew = b->nonew;
620           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
621         }
622       }
623     } else {
624       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
625       if (!aij->donotstash) {
626         mat->assembled = PETSC_FALSE;
627         if (roworiented) {
628           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
629         } else {
630           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
631         }
632       }
633     }
634   }
635   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
636   PetscCall(MatSeqAIJRestoreArray(B, &ba));
637   PetscFunctionReturn(PETSC_SUCCESS);
638 }
639 
640 /*
641     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
642     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
643     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
644 */
645 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
646 {
647   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
648   Mat         A      = aij->A; /* diagonal part of the matrix */
649   Mat         B      = aij->B; /* off-diagonal part of the matrix */
650   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
651   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
652   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
653   PetscInt   *ailen = a->ilen, *aj = a->j;
654   PetscInt   *bilen = b->ilen, *bj = b->j;
655   PetscInt    am          = aij->A->rmap->n, j;
656   PetscInt    diag_so_far = 0, dnz;
657   PetscInt    offd_so_far = 0, onz;
658 
659   PetscFunctionBegin;
660   /* Iterate over all rows of the matrix */
661   for (j = 0; j < am; j++) {
662     dnz = onz = 0;
663     /*  Iterate over all non-zero columns of the current row */
664     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
665       /* If column is in the diagonal */
666       if (mat_j[col] >= cstart && mat_j[col] < cend) {
667         aj[diag_so_far++] = mat_j[col] - cstart;
668         dnz++;
669       } else { /* off-diagonal entries */
670         bj[offd_so_far++] = mat_j[col];
671         onz++;
672       }
673     }
674     ailen[j] = dnz;
675     bilen[j] = onz;
676   }
677   PetscFunctionReturn(PETSC_SUCCESS);
678 }
679 
680 /*
681     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
682     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
683     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
684     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
685     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
686 */
687 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
688 {
689   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
690   Mat          A    = aij->A; /* diagonal part of the matrix */
691   Mat          B    = aij->B; /* off-diagonal part of the matrix */
692   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
693   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
694   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
695   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
696   PetscInt    *ailen = a->ilen, *aj = a->j;
697   PetscInt    *bilen = b->ilen, *bj = b->j;
698   PetscInt     am          = aij->A->rmap->n, j;
699   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
700   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
701   PetscScalar *aa = a->a, *ba = b->a;
702 
703   PetscFunctionBegin;
704   /* Iterate over all rows of the matrix */
705   for (j = 0; j < am; j++) {
706     dnz_row = onz_row = 0;
707     rowstart_offd     = full_offd_i[j];
708     rowstart_diag     = full_diag_i[j];
709     /*  Iterate over all non-zero columns of the current row */
710     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
711       /* If column is in the diagonal */
712       if (mat_j[col] >= cstart && mat_j[col] < cend) {
713         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
714         aa[rowstart_diag + dnz_row] = mat_a[col];
715         dnz_row++;
716       } else { /* off-diagonal entries */
717         bj[rowstart_offd + onz_row] = mat_j[col];
718         ba[rowstart_offd + onz_row] = mat_a[col];
719         onz_row++;
720       }
721     }
722     ailen[j] = dnz_row;
723     bilen[j] = onz_row;
724   }
725   PetscFunctionReturn(PETSC_SUCCESS);
726 }
727 
728 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
729 {
730   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
731   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
732   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
733 
734   PetscFunctionBegin;
735   for (i = 0; i < m; i++) {
736     if (idxm[i] < 0) continue; /* negative row */
737     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
738     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
739     row = idxm[i] - rstart;
740     for (j = 0; j < n; j++) {
741       if (idxn[j] < 0) continue; /* negative column */
742       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
743       if (idxn[j] >= cstart && idxn[j] < cend) {
744         col = idxn[j] - cstart;
745         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
746       } else {
747         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
748 #if defined(PETSC_USE_CTABLE)
749         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
750         col--;
751 #else
752         col = aij->colmap[idxn[j]] - 1;
753 #endif
754         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
755         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
756       }
757     }
758   }
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
762 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
763 {
764   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
765   PetscInt    nstash, reallocs;
766 
767   PetscFunctionBegin;
768   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
769 
770   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
771   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
772   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
773   PetscFunctionReturn(PETSC_SUCCESS);
774 }
775 
776 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
777 {
778   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
779   PetscMPIInt  n;
780   PetscInt     i, j, rstart, ncols, flg;
781   PetscInt    *row, *col;
782   PetscBool    other_disassembled;
783   PetscScalar *val;
784 
785   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
786 
787   PetscFunctionBegin;
788   if (!aij->donotstash && !mat->nooffprocentries) {
789     while (1) {
790       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
791       if (!flg) break;
792 
793       for (i = 0; i < n;) {
794         /* Now identify the consecutive vals belonging to the same row */
795         for (j = i, rstart = row[j]; j < n; j++) {
796           if (row[j] != rstart) break;
797         }
798         if (j < n) ncols = j - i;
799         else ncols = n - i;
800         /* Now assemble all these values with a single function call */
801         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
802         i = j;
803       }
804     }
805     PetscCall(MatStashScatterEnd_Private(&mat->stash));
806   }
807 #if defined(PETSC_HAVE_DEVICE)
808   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
809   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
810   if (mat->boundtocpu) {
811     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
812     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
813   }
814 #endif
815   PetscCall(MatAssemblyBegin(aij->A, mode));
816   PetscCall(MatAssemblyEnd(aij->A, mode));
817 
818   /* determine if any processor has disassembled, if so we must
819      also disassemble ourself, in order that we may reassemble. */
820   /*
821      if nonzero structure of submatrix B cannot change then we know that
822      no processor disassembled thus we can skip this stuff
823   */
824   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
825     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
826     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
827       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
828     }
829   }
830   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
831   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
832 #if defined(PETSC_HAVE_DEVICE)
833   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
834 #endif
835   PetscCall(MatAssemblyBegin(aij->B, mode));
836   PetscCall(MatAssemblyEnd(aij->B, mode));
837 
838   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
839 
840   aij->rowvalues = NULL;
841 
842   PetscCall(VecDestroy(&aij->diag));
843 
844   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
845   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
846     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
847     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
848   }
849 #if defined(PETSC_HAVE_DEVICE)
850   mat->offloadmask = PETSC_OFFLOAD_BOTH;
851 #endif
852   PetscFunctionReturn(PETSC_SUCCESS);
853 }
854 
855 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
856 {
857   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
858 
859   PetscFunctionBegin;
860   PetscCall(MatZeroEntries(l->A));
861   PetscCall(MatZeroEntries(l->B));
862   PetscFunctionReturn(PETSC_SUCCESS);
863 }
864 
865 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
866 {
867   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
868   PetscInt   *lrows;
869   PetscInt    r, len;
870   PetscBool   cong;
871 
872   PetscFunctionBegin;
873   /* get locally owned rows */
874   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
875   PetscCall(MatHasCongruentLayouts(A, &cong));
876   /* fix right-hand side if needed */
877   if (x && b) {
878     const PetscScalar *xx;
879     PetscScalar       *bb;
880 
881     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
882     PetscCall(VecGetArrayRead(x, &xx));
883     PetscCall(VecGetArray(b, &bb));
884     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
885     PetscCall(VecRestoreArrayRead(x, &xx));
886     PetscCall(VecRestoreArray(b, &bb));
887   }
888 
889   if (diag != 0.0 && cong) {
890     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
891     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
892   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
893     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
894     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
895     PetscInt    nnwA, nnwB;
896     PetscBool   nnzA, nnzB;
897 
898     nnwA = aijA->nonew;
899     nnwB = aijB->nonew;
900     nnzA = aijA->keepnonzeropattern;
901     nnzB = aijB->keepnonzeropattern;
902     if (!nnzA) {
903       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
904       aijA->nonew = 0;
905     }
906     if (!nnzB) {
907       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
908       aijB->nonew = 0;
909     }
910     /* Must zero here before the next loop */
911     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
912     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
913     for (r = 0; r < len; ++r) {
914       const PetscInt row = lrows[r] + A->rmap->rstart;
915       if (row >= A->cmap->N) continue;
916       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
917     }
918     aijA->nonew = nnwA;
919     aijB->nonew = nnwB;
920   } else {
921     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
922     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
923   }
924   PetscCall(PetscFree(lrows));
925   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
926   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
927 
928   /* only change matrix nonzero state if pattern was allowed to be changed */
929   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
930     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
931     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
932   }
933   PetscFunctionReturn(PETSC_SUCCESS);
934 }
935 
936 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
937 {
938   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
939   PetscInt           n = A->rmap->n;
940   PetscInt           i, j, r, m, len = 0;
941   PetscInt          *lrows, *owners = A->rmap->range;
942   PetscMPIInt        p = 0;
943   PetscSFNode       *rrows;
944   PetscSF            sf;
945   const PetscScalar *xx;
946   PetscScalar       *bb, *mask, *aij_a;
947   Vec                xmask, lmask;
948   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
949   const PetscInt    *aj, *ii, *ridx;
950   PetscScalar       *aa;
951 
952   PetscFunctionBegin;
953   /* Create SF where leaves are input rows and roots are owned rows */
954   PetscCall(PetscMalloc1(n, &lrows));
955   for (r = 0; r < n; ++r) lrows[r] = -1;
956   PetscCall(PetscMalloc1(N, &rrows));
957   for (r = 0; r < N; ++r) {
958     const PetscInt idx = rows[r];
959     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
960     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
961       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
962     }
963     rrows[r].rank  = p;
964     rrows[r].index = rows[r] - owners[p];
965   }
966   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
967   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
968   /* Collect flags for rows to be zeroed */
969   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
970   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
971   PetscCall(PetscSFDestroy(&sf));
972   /* Compress and put in row numbers */
973   for (r = 0; r < n; ++r)
974     if (lrows[r] >= 0) lrows[len++] = r;
975   /* zero diagonal part of matrix */
976   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
977   /* handle off-diagonal part of matrix */
978   PetscCall(MatCreateVecs(A, &xmask, NULL));
979   PetscCall(VecDuplicate(l->lvec, &lmask));
980   PetscCall(VecGetArray(xmask, &bb));
981   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
982   PetscCall(VecRestoreArray(xmask, &bb));
983   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
984   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
985   PetscCall(VecDestroy(&xmask));
986   if (x && b) { /* this code is buggy when the row and column layout don't match */
987     PetscBool cong;
988 
989     PetscCall(MatHasCongruentLayouts(A, &cong));
990     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
991     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
992     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
993     PetscCall(VecGetArrayRead(l->lvec, &xx));
994     PetscCall(VecGetArray(b, &bb));
995   }
996   PetscCall(VecGetArray(lmask, &mask));
997   /* remove zeroed rows of off-diagonal matrix */
998   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
999   ii = aij->i;
1000   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
1001   /* loop over all elements of off process part of matrix zeroing removed columns*/
1002   if (aij->compressedrow.use) {
1003     m    = aij->compressedrow.nrows;
1004     ii   = aij->compressedrow.i;
1005     ridx = aij->compressedrow.rindex;
1006     for (i = 0; i < m; i++) {
1007       n  = ii[i + 1] - ii[i];
1008       aj = aij->j + ii[i];
1009       aa = aij_a + ii[i];
1010 
1011       for (j = 0; j < n; j++) {
1012         if (PetscAbsScalar(mask[*aj])) {
1013           if (b) bb[*ridx] -= *aa * xx[*aj];
1014           *aa = 0.0;
1015         }
1016         aa++;
1017         aj++;
1018       }
1019       ridx++;
1020     }
1021   } else { /* do not use compressed row format */
1022     m = l->B->rmap->n;
1023     for (i = 0; i < m; i++) {
1024       n  = ii[i + 1] - ii[i];
1025       aj = aij->j + ii[i];
1026       aa = aij_a + ii[i];
1027       for (j = 0; j < n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[i] -= *aa * xx[*aj];
1030           *aa = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035     }
1036   }
1037   if (x && b) {
1038     PetscCall(VecRestoreArray(b, &bb));
1039     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1040   }
1041   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1042   PetscCall(VecRestoreArray(lmask, &mask));
1043   PetscCall(VecDestroy(&lmask));
1044   PetscCall(PetscFree(lrows));
1045 
1046   /* only change matrix nonzero state if pattern was allowed to be changed */
1047   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1048     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1049     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1050   }
1051   PetscFunctionReturn(PETSC_SUCCESS);
1052 }
1053 
1054 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1055 {
1056   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1057   PetscInt    nt;
1058   VecScatter  Mvctx = a->Mvctx;
1059 
1060   PetscFunctionBegin;
1061   PetscCall(VecGetLocalSize(xx, &nt));
1062   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscUseTypeMethod(a->A, mult, xx, yy);
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1076   PetscFunctionReturn(PETSC_SUCCESS);
1077 }
1078 
1079 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1080 {
1081   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1082   VecScatter  Mvctx = a->Mvctx;
1083 
1084   PetscFunctionBegin;
1085   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1086   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1087   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1088   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1089   PetscFunctionReturn(PETSC_SUCCESS);
1090 }
1091 
1092 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1093 {
1094   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1095 
1096   PetscFunctionBegin;
1097   /* do nondiagonal part */
1098   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1099   /* do local part */
1100   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1101   /* add partial results together */
1102   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1103   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1104   PetscFunctionReturn(PETSC_SUCCESS);
1105 }
1106 
1107 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1108 {
1109   MPI_Comm    comm;
1110   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1111   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1112   IS          Me, Notme;
1113   PetscInt    M, N, first, last, *notme, i;
1114   PetscBool   lf;
1115   PetscMPIInt size;
1116 
1117   PetscFunctionBegin;
1118   /* Easy test: symmetric diagonal block */
1119   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1120   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1121   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1122   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1123   PetscCallMPI(MPI_Comm_size(comm, &size));
1124   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1125 
1126   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1127   PetscCall(MatGetSize(Amat, &M, &N));
1128   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1129   PetscCall(PetscMalloc1(N - last + first, &notme));
1130   for (i = 0; i < first; i++) notme[i] = i;
1131   for (i = last; i < M; i++) notme[i - last + first] = i;
1132   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1133   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1134   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1135   Aoff = Aoffs[0];
1136   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1137   Boff = Boffs[0];
1138   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1139   PetscCall(MatDestroyMatrices(1, &Aoffs));
1140   PetscCall(MatDestroyMatrices(1, &Boffs));
1141   PetscCall(ISDestroy(&Me));
1142   PetscCall(ISDestroy(&Notme));
1143   PetscCall(PetscFree(notme));
1144   PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146 
1147 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1148 {
1149   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1150 
1151   PetscFunctionBegin;
1152   /* do nondiagonal part */
1153   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1154   /* do local part */
1155   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1156   /* add partial results together */
1157   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1158   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1159   PetscFunctionReturn(PETSC_SUCCESS);
1160 }
1161 
1162 /*
1163   This only works correctly for square matrices where the subblock A->A is the
1164    diagonal block
1165 */
1166 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1167 {
1168   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1172   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1173   PetscCall(MatGetDiagonal(a->A, v));
1174   PetscFunctionReturn(PETSC_SUCCESS);
1175 }
1176 
1177 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1178 {
1179   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1180 
1181   PetscFunctionBegin;
1182   PetscCall(MatScale(a->A, aa));
1183   PetscCall(MatScale(a->B, aa));
1184   PetscFunctionReturn(PETSC_SUCCESS);
1185 }
1186 
1187 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1188 {
1189   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1190   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1191   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1192   const PetscInt    *garray = aij->garray;
1193   const PetscScalar *aa, *ba;
1194   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1195   PetscInt64         nz, hnz;
1196   PetscInt          *rowlens;
1197   PetscInt          *colidxs;
1198   PetscScalar       *matvals;
1199   PetscMPIInt        rank;
1200 
1201   PetscFunctionBegin;
1202   PetscCall(PetscViewerSetUp(viewer));
1203 
1204   M  = mat->rmap->N;
1205   N  = mat->cmap->N;
1206   m  = mat->rmap->n;
1207   rs = mat->rmap->rstart;
1208   cs = mat->cmap->rstart;
1209   nz = A->nz + B->nz;
1210 
1211   /* write matrix header */
1212   header[0] = MAT_FILE_CLASSID;
1213   header[1] = M;
1214   header[2] = N;
1215   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1216   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1217   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1218   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1219 
1220   /* fill in and store row lengths  */
1221   PetscCall(PetscMalloc1(m, &rowlens));
1222   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1223   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1224   PetscCall(PetscFree(rowlens));
1225 
1226   /* fill in and store column indices */
1227   PetscCall(PetscMalloc1(nz, &colidxs));
1228   for (cnt = 0, i = 0; i < m; i++) {
1229     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1230       if (garray[B->j[jb]] > cs) break;
1231       colidxs[cnt++] = garray[B->j[jb]];
1232     }
1233     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1234     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1235   }
1236   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1237   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1238   PetscCall(PetscFree(colidxs));
1239 
1240   /* fill in and store nonzero values */
1241   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1242   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1243   PetscCall(PetscMalloc1(nz, &matvals));
1244   for (cnt = 0, i = 0; i < m; i++) {
1245     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1246       if (garray[B->j[jb]] > cs) break;
1247       matvals[cnt++] = ba[jb];
1248     }
1249     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1250     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1251   }
1252   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1253   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1254   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1255   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1256   PetscCall(PetscFree(matvals));
1257 
1258   /* write block size option to the viewer's .info file */
1259   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1260   PetscFunctionReturn(PETSC_SUCCESS);
1261 }
1262 
1263 #include <petscdraw.h>
1264 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1265 {
1266   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1267   PetscMPIInt       rank = aij->rank, size = aij->size;
1268   PetscBool         isdraw, iascii, isbinary;
1269   PetscViewer       sviewer;
1270   PetscViewerFormat format;
1271 
1272   PetscFunctionBegin;
1273   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1274   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1275   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1276   if (iascii) {
1277     PetscCall(PetscViewerGetFormat(viewer, &format));
1278     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1279       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1280       PetscCall(PetscMalloc1(size, &nz));
1281       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1282       for (i = 0; i < size; i++) {
1283         nmax = PetscMax(nmax, nz[i]);
1284         nmin = PetscMin(nmin, nz[i]);
1285         navg += nz[i];
1286       }
1287       PetscCall(PetscFree(nz));
1288       navg = navg / size;
1289       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1290       PetscFunctionReturn(PETSC_SUCCESS);
1291     }
1292     PetscCall(PetscViewerGetFormat(viewer, &format));
1293     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1294       MatInfo   info;
1295       PetscInt *inodes = NULL;
1296 
1297       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1298       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1299       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1300       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1301       if (!inodes) {
1302         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1303                                                      info.memory));
1304       } else {
1305         PetscCall(
1306           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1307       }
1308       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1309       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1310       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1311       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1312       PetscCall(PetscViewerFlush(viewer));
1313       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1314       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1315       PetscCall(VecScatterView(aij->Mvctx, viewer));
1316       PetscFunctionReturn(PETSC_SUCCESS);
1317     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1318       PetscInt inodecount, inodelimit, *inodes;
1319       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1320       if (inodes) {
1321         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1322       } else {
1323         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1324       }
1325       PetscFunctionReturn(PETSC_SUCCESS);
1326     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1327       PetscFunctionReturn(PETSC_SUCCESS);
1328     }
1329   } else if (isbinary) {
1330     if (size == 1) {
1331       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1332       PetscCall(MatView(aij->A, viewer));
1333     } else {
1334       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1335     }
1336     PetscFunctionReturn(PETSC_SUCCESS);
1337   } else if (iascii && size == 1) {
1338     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1339     PetscCall(MatView(aij->A, viewer));
1340     PetscFunctionReturn(PETSC_SUCCESS);
1341   } else if (isdraw) {
1342     PetscDraw draw;
1343     PetscBool isnull;
1344     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1345     PetscCall(PetscDrawIsNull(draw, &isnull));
1346     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1347   }
1348 
1349   { /* assemble the entire matrix onto first processor */
1350     Mat A = NULL, Av;
1351     IS  isrow, iscol;
1352 
1353     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1354     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1355     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1356     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1357     /*  The commented code uses MatCreateSubMatrices instead */
1358     /*
1359     Mat *AA, A = NULL, Av;
1360     IS  isrow,iscol;
1361 
1362     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1363     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1364     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1365     if (rank == 0) {
1366        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1367        A    = AA[0];
1368        Av   = AA[0];
1369     }
1370     PetscCall(MatDestroySubMatrices(1,&AA));
1371 */
1372     PetscCall(ISDestroy(&iscol));
1373     PetscCall(ISDestroy(&isrow));
1374     /*
1375        Everyone has to call to draw the matrix since the graphics waits are
1376        synchronized across all processors that share the PetscDraw object
1377     */
1378     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1379     if (rank == 0) {
1380       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1381       PetscCall(MatView_SeqAIJ(Av, sviewer));
1382     }
1383     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1384     PetscCall(MatDestroy(&A));
1385   }
1386   PetscFunctionReturn(PETSC_SUCCESS);
1387 }
1388 
1389 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1390 {
1391   PetscBool iascii, isdraw, issocket, isbinary;
1392 
1393   PetscFunctionBegin;
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1395   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1396   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1397   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1398   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1399   PetscFunctionReturn(PETSC_SUCCESS);
1400 }
1401 
1402 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1403 {
1404   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1405   Vec         bb1 = NULL;
1406   PetscBool   hasop;
1407 
1408   PetscFunctionBegin;
1409   if (flag == SOR_APPLY_UPPER) {
1410     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1411     PetscFunctionReturn(PETSC_SUCCESS);
1412   }
1413 
1414   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1415 
1416   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1417     if (flag & SOR_ZERO_INITIAL_GUESS) {
1418       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1419       its--;
1420     }
1421 
1422     while (its--) {
1423       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1424       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1425 
1426       /* update rhs: bb1 = bb - B*x */
1427       PetscCall(VecScale(mat->lvec, -1.0));
1428       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1429 
1430       /* local sweep */
1431       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1432     }
1433   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1434     if (flag & SOR_ZERO_INITIAL_GUESS) {
1435       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1436       its--;
1437     }
1438     while (its--) {
1439       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1440       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1441 
1442       /* update rhs: bb1 = bb - B*x */
1443       PetscCall(VecScale(mat->lvec, -1.0));
1444       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1445 
1446       /* local sweep */
1447       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1448     }
1449   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1450     if (flag & SOR_ZERO_INITIAL_GUESS) {
1451       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1452       its--;
1453     }
1454     while (its--) {
1455       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1456       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1457 
1458       /* update rhs: bb1 = bb - B*x */
1459       PetscCall(VecScale(mat->lvec, -1.0));
1460       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1461 
1462       /* local sweep */
1463       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1464     }
1465   } else if (flag & SOR_EISENSTAT) {
1466     Vec xx1;
1467 
1468     PetscCall(VecDuplicate(bb, &xx1));
1469     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1470 
1471     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1472     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1473     if (!mat->diag) {
1474       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1475       PetscCall(MatGetDiagonal(matin, mat->diag));
1476     }
1477     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1478     if (hasop) {
1479       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1480     } else {
1481       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1482     }
1483     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1484 
1485     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1486 
1487     /* local sweep */
1488     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1489     PetscCall(VecAXPY(xx, 1.0, xx1));
1490     PetscCall(VecDestroy(&xx1));
1491   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1492 
1493   PetscCall(VecDestroy(&bb1));
1494 
1495   matin->factorerrortype = mat->A->factorerrortype;
1496   PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498 
1499 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1500 {
1501   Mat             aA, aB, Aperm;
1502   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1503   PetscScalar    *aa, *ba;
1504   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1505   PetscSF         rowsf, sf;
1506   IS              parcolp = NULL;
1507   PetscBool       done;
1508 
1509   PetscFunctionBegin;
1510   PetscCall(MatGetLocalSize(A, &m, &n));
1511   PetscCall(ISGetIndices(rowp, &rwant));
1512   PetscCall(ISGetIndices(colp, &cwant));
1513   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1514 
1515   /* Invert row permutation to find out where my rows should go */
1516   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1517   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1518   PetscCall(PetscSFSetFromOptions(rowsf));
1519   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1520   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1521   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1522 
1523   /* Invert column permutation to find out where my columns should go */
1524   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1525   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1526   PetscCall(PetscSFSetFromOptions(sf));
1527   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1528   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1529   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1530   PetscCall(PetscSFDestroy(&sf));
1531 
1532   PetscCall(ISRestoreIndices(rowp, &rwant));
1533   PetscCall(ISRestoreIndices(colp, &cwant));
1534   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1535 
1536   /* Find out where my gcols should go */
1537   PetscCall(MatGetSize(aB, NULL, &ng));
1538   PetscCall(PetscMalloc1(ng, &gcdest));
1539   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1540   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1541   PetscCall(PetscSFSetFromOptions(sf));
1542   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1543   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1544   PetscCall(PetscSFDestroy(&sf));
1545 
1546   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1547   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1548   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1549   for (i = 0; i < m; i++) {
1550     PetscInt    row = rdest[i];
1551     PetscMPIInt rowner;
1552     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1553     for (j = ai[i]; j < ai[i + 1]; j++) {
1554       PetscInt    col = cdest[aj[j]];
1555       PetscMPIInt cowner;
1556       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1557       if (rowner == cowner) dnnz[i]++;
1558       else onnz[i]++;
1559     }
1560     for (j = bi[i]; j < bi[i + 1]; j++) {
1561       PetscInt    col = gcdest[bj[j]];
1562       PetscMPIInt cowner;
1563       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1564       if (rowner == cowner) dnnz[i]++;
1565       else onnz[i]++;
1566     }
1567   }
1568   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1570   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1571   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1572   PetscCall(PetscSFDestroy(&rowsf));
1573 
1574   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1575   PetscCall(MatSeqAIJGetArray(aA, &aa));
1576   PetscCall(MatSeqAIJGetArray(aB, &ba));
1577   for (i = 0; i < m; i++) {
1578     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1579     PetscInt  j0, rowlen;
1580     rowlen = ai[i + 1] - ai[i];
1581     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1582       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1583       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1584     }
1585     rowlen = bi[i + 1] - bi[i];
1586     for (j0 = j = 0; j < rowlen; j0 = j) {
1587       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1588       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1589     }
1590   }
1591   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1592   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1593   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1594   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1595   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1596   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1597   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1598   PetscCall(PetscFree3(work, rdest, cdest));
1599   PetscCall(PetscFree(gcdest));
1600   if (parcolp) PetscCall(ISDestroy(&colp));
1601   *B = Aperm;
1602   PetscFunctionReturn(PETSC_SUCCESS);
1603 }
1604 
1605 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1606 {
1607   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1608 
1609   PetscFunctionBegin;
1610   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1611   if (ghosts) *ghosts = aij->garray;
1612   PetscFunctionReturn(PETSC_SUCCESS);
1613 }
1614 
1615 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1616 {
1617   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1618   Mat            A = mat->A, B = mat->B;
1619   PetscLogDouble isend[5], irecv[5];
1620 
1621   PetscFunctionBegin;
1622   info->block_size = 1.0;
1623   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1624 
1625   isend[0] = info->nz_used;
1626   isend[1] = info->nz_allocated;
1627   isend[2] = info->nz_unneeded;
1628   isend[3] = info->memory;
1629   isend[4] = info->mallocs;
1630 
1631   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1632 
1633   isend[0] += info->nz_used;
1634   isend[1] += info->nz_allocated;
1635   isend[2] += info->nz_unneeded;
1636   isend[3] += info->memory;
1637   isend[4] += info->mallocs;
1638   if (flag == MAT_LOCAL) {
1639     info->nz_used      = isend[0];
1640     info->nz_allocated = isend[1];
1641     info->nz_unneeded  = isend[2];
1642     info->memory       = isend[3];
1643     info->mallocs      = isend[4];
1644   } else if (flag == MAT_GLOBAL_MAX) {
1645     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1646 
1647     info->nz_used      = irecv[0];
1648     info->nz_allocated = irecv[1];
1649     info->nz_unneeded  = irecv[2];
1650     info->memory       = irecv[3];
1651     info->mallocs      = irecv[4];
1652   } else if (flag == MAT_GLOBAL_SUM) {
1653     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1654 
1655     info->nz_used      = irecv[0];
1656     info->nz_allocated = irecv[1];
1657     info->nz_unneeded  = irecv[2];
1658     info->memory       = irecv[3];
1659     info->mallocs      = irecv[4];
1660   }
1661   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1662   info->fill_ratio_needed = 0;
1663   info->factor_mallocs    = 0;
1664   PetscFunctionReturn(PETSC_SUCCESS);
1665 }
1666 
1667 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1668 {
1669   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1670 
1671   PetscFunctionBegin;
1672   switch (op) {
1673   case MAT_NEW_NONZERO_LOCATIONS:
1674   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1675   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1676   case MAT_KEEP_NONZERO_PATTERN:
1677   case MAT_NEW_NONZERO_LOCATION_ERR:
1678   case MAT_USE_INODES:
1679   case MAT_IGNORE_ZERO_ENTRIES:
1680   case MAT_FORM_EXPLICIT_TRANSPOSE:
1681     MatCheckPreallocated(A, 1);
1682     PetscCall(MatSetOption(a->A, op, flg));
1683     PetscCall(MatSetOption(a->B, op, flg));
1684     break;
1685   case MAT_ROW_ORIENTED:
1686     MatCheckPreallocated(A, 1);
1687     a->roworiented = flg;
1688 
1689     PetscCall(MatSetOption(a->A, op, flg));
1690     PetscCall(MatSetOption(a->B, op, flg));
1691     break;
1692   case MAT_FORCE_DIAGONAL_ENTRIES:
1693   case MAT_SORTED_FULL:
1694     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1695     break;
1696   case MAT_IGNORE_OFF_PROC_ENTRIES:
1697     a->donotstash = flg;
1698     break;
1699   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1700   case MAT_SPD:
1701   case MAT_SYMMETRIC:
1702   case MAT_STRUCTURALLY_SYMMETRIC:
1703   case MAT_HERMITIAN:
1704   case MAT_SYMMETRY_ETERNAL:
1705   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1706   case MAT_SPD_ETERNAL:
1707     /* if the diagonal matrix is square it inherits some of the properties above */
1708     break;
1709   case MAT_SUBMAT_SINGLEIS:
1710     A->submat_singleis = flg;
1711     break;
1712   case MAT_STRUCTURE_ONLY:
1713     /* The option is handled directly by MatSetOption() */
1714     break;
1715   default:
1716     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1717   }
1718   PetscFunctionReturn(PETSC_SUCCESS);
1719 }
1720 
1721 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1722 {
1723   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1724   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1725   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1726   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1727   PetscInt    *cmap, *idx_p;
1728 
1729   PetscFunctionBegin;
1730   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1731   mat->getrowactive = PETSC_TRUE;
1732 
1733   if (!mat->rowvalues && (idx || v)) {
1734     /*
1735         allocate enough space to hold information from the longest row.
1736     */
1737     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1738     PetscInt    max = 1, tmp;
1739     for (i = 0; i < matin->rmap->n; i++) {
1740       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1741       if (max < tmp) max = tmp;
1742     }
1743     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1744   }
1745 
1746   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1747   lrow = row - rstart;
1748 
1749   pvA = &vworkA;
1750   pcA = &cworkA;
1751   pvB = &vworkB;
1752   pcB = &cworkB;
1753   if (!v) {
1754     pvA = NULL;
1755     pvB = NULL;
1756   }
1757   if (!idx) {
1758     pcA = NULL;
1759     if (!v) pcB = NULL;
1760   }
1761   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1762   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1763   nztot = nzA + nzB;
1764 
1765   cmap = mat->garray;
1766   if (v || idx) {
1767     if (nztot) {
1768       /* Sort by increasing column numbers, assuming A and B already sorted */
1769       PetscInt imark = -1;
1770       if (v) {
1771         *v = v_p = mat->rowvalues;
1772         for (i = 0; i < nzB; i++) {
1773           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1774           else break;
1775         }
1776         imark = i;
1777         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1778         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1779       }
1780       if (idx) {
1781         *idx = idx_p = mat->rowindices;
1782         if (imark > -1) {
1783           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1784         } else {
1785           for (i = 0; i < nzB; i++) {
1786             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1787             else break;
1788           }
1789           imark = i;
1790         }
1791         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1792         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1793       }
1794     } else {
1795       if (idx) *idx = NULL;
1796       if (v) *v = NULL;
1797     }
1798   }
1799   *nz = nztot;
1800   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1801   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1802   PetscFunctionReturn(PETSC_SUCCESS);
1803 }
1804 
1805 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1806 {
1807   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1808 
1809   PetscFunctionBegin;
1810   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1811   aij->getrowactive = PETSC_FALSE;
1812   PetscFunctionReturn(PETSC_SUCCESS);
1813 }
1814 
1815 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1816 {
1817   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1818   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1819   PetscInt         i, j, cstart = mat->cmap->rstart;
1820   PetscReal        sum = 0.0;
1821   const MatScalar *v, *amata, *bmata;
1822   PetscMPIInt      iN;
1823 
1824   PetscFunctionBegin;
1825   if (aij->size == 1) {
1826     PetscCall(MatNorm(aij->A, type, norm));
1827   } else {
1828     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1829     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1830     if (type == NORM_FROBENIUS) {
1831       v = amata;
1832       for (i = 0; i < amat->nz; i++) {
1833         sum += PetscRealPart(PetscConj(*v) * (*v));
1834         v++;
1835       }
1836       v = bmata;
1837       for (i = 0; i < bmat->nz; i++) {
1838         sum += PetscRealPart(PetscConj(*v) * (*v));
1839         v++;
1840       }
1841       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1842       *norm = PetscSqrtReal(*norm);
1843       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1844     } else if (type == NORM_1) { /* max column norm */
1845       PetscReal *tmp, *tmp2;
1846       PetscInt  *jj, *garray = aij->garray;
1847       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1848       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1849       *norm = 0.0;
1850       v     = amata;
1851       jj    = amat->j;
1852       for (j = 0; j < amat->nz; j++) {
1853         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1854         v++;
1855       }
1856       v  = bmata;
1857       jj = bmat->j;
1858       for (j = 0; j < bmat->nz; j++) {
1859         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1860         v++;
1861       }
1862       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
1863       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1864       for (j = 0; j < mat->cmap->N; j++) {
1865         if (tmp2[j] > *norm) *norm = tmp2[j];
1866       }
1867       PetscCall(PetscFree(tmp));
1868       PetscCall(PetscFree(tmp2));
1869       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1870     } else if (type == NORM_INFINITY) { /* max row norm */
1871       PetscReal ntemp = 0.0;
1872       for (j = 0; j < aij->A->rmap->n; j++) {
1873         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1874         sum = 0.0;
1875         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1876           sum += PetscAbsScalar(*v);
1877           v++;
1878         }
1879         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1880         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1881           sum += PetscAbsScalar(*v);
1882           v++;
1883         }
1884         if (sum > ntemp) ntemp = sum;
1885       }
1886       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1887       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1888     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1889     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1890     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1891   }
1892   PetscFunctionReturn(PETSC_SUCCESS);
1893 }
1894 
1895 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1896 {
1897   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1898   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1899   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1900   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1901   Mat              B, A_diag, *B_diag;
1902   const MatScalar *pbv, *bv;
1903 
1904   PetscFunctionBegin;
1905   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1906   ma = A->rmap->n;
1907   na = A->cmap->n;
1908   mb = a->B->rmap->n;
1909   nb = a->B->cmap->n;
1910   ai = Aloc->i;
1911   aj = Aloc->j;
1912   bi = Bloc->i;
1913   bj = Bloc->j;
1914   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1915     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1916     PetscSFNode         *oloc;
1917     PETSC_UNUSED PetscSF sf;
1918 
1919     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1920     /* compute d_nnz for preallocation */
1921     PetscCall(PetscArrayzero(d_nnz, na));
1922     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1923     /* compute local off-diagonal contributions */
1924     PetscCall(PetscArrayzero(g_nnz, nb));
1925     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1926     /* map those to global */
1927     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1928     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1929     PetscCall(PetscSFSetFromOptions(sf));
1930     PetscCall(PetscArrayzero(o_nnz, na));
1931     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1932     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1933     PetscCall(PetscSFDestroy(&sf));
1934 
1935     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1936     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1937     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1938     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1939     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1940     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1941   } else {
1942     B = *matout;
1943     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1944   }
1945 
1946   b           = (Mat_MPIAIJ *)B->data;
1947   A_diag      = a->A;
1948   B_diag      = &b->A;
1949   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1950   A_diag_ncol = A_diag->cmap->N;
1951   B_diag_ilen = sub_B_diag->ilen;
1952   B_diag_i    = sub_B_diag->i;
1953 
1954   /* Set ilen for diagonal of B */
1955   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1956 
1957   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1958   very quickly (=without using MatSetValues), because all writes are local. */
1959   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1960   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1961 
1962   /* copy over the B part */
1963   PetscCall(PetscMalloc1(bi[mb], &cols));
1964   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1965   pbv = bv;
1966   row = A->rmap->rstart;
1967   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1968   cols_tmp = cols;
1969   for (i = 0; i < mb; i++) {
1970     ncol = bi[i + 1] - bi[i];
1971     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1972     row++;
1973     if (pbv) pbv += ncol;
1974     if (cols_tmp) cols_tmp += ncol;
1975   }
1976   PetscCall(PetscFree(cols));
1977   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1978 
1979   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1980   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1981   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1982     *matout = B;
1983   } else {
1984     PetscCall(MatHeaderMerge(A, &B));
1985   }
1986   PetscFunctionReturn(PETSC_SUCCESS);
1987 }
1988 
1989 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1990 {
1991   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1992   Mat         a = aij->A, b = aij->B;
1993   PetscInt    s1, s2, s3;
1994 
1995   PetscFunctionBegin;
1996   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1997   if (rr) {
1998     PetscCall(VecGetLocalSize(rr, &s1));
1999     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2000     /* Overlap communication with computation. */
2001     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2002   }
2003   if (ll) {
2004     PetscCall(VecGetLocalSize(ll, &s1));
2005     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2006     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2007   }
2008   /* scale  the diagonal block */
2009   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2010 
2011   if (rr) {
2012     /* Do a scatter end and then right scale the off-diagonal block */
2013     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2014     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2015   }
2016   PetscFunctionReturn(PETSC_SUCCESS);
2017 }
2018 
2019 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2020 {
2021   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2022 
2023   PetscFunctionBegin;
2024   PetscCall(MatSetUnfactored(a->A));
2025   PetscFunctionReturn(PETSC_SUCCESS);
2026 }
2027 
2028 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2029 {
2030   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2031   Mat         a, b, c, d;
2032   PetscBool   flg;
2033 
2034   PetscFunctionBegin;
2035   a = matA->A;
2036   b = matA->B;
2037   c = matB->A;
2038   d = matB->B;
2039 
2040   PetscCall(MatEqual(a, c, &flg));
2041   if (flg) PetscCall(MatEqual(b, d, &flg));
2042   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2043   PetscFunctionReturn(PETSC_SUCCESS);
2044 }
2045 
2046 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2047 {
2048   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2049   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2050 
2051   PetscFunctionBegin;
2052   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2053   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2054     /* because of the column compression in the off-processor part of the matrix a->B,
2055        the number of columns in a->B and b->B may be different, hence we cannot call
2056        the MatCopy() directly on the two parts. If need be, we can provide a more
2057        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2058        then copying the submatrices */
2059     PetscCall(MatCopy_Basic(A, B, str));
2060   } else {
2061     PetscCall(MatCopy(a->A, b->A, str));
2062     PetscCall(MatCopy(a->B, b->B, str));
2063   }
2064   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2065   PetscFunctionReturn(PETSC_SUCCESS);
2066 }
2067 
2068 /*
2069    Computes the number of nonzeros per row needed for preallocation when X and Y
2070    have different nonzero structure.
2071 */
2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2073 {
2074   PetscInt i, j, k, nzx, nzy;
2075 
2076   PetscFunctionBegin;
2077   /* Set the number of nonzeros in the new matrix */
2078   for (i = 0; i < m; i++) {
2079     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2080     nzx    = xi[i + 1] - xi[i];
2081     nzy    = yi[i + 1] - yi[i];
2082     nnz[i] = 0;
2083     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2084       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2085       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2086       nnz[i]++;
2087     }
2088     for (; k < nzy; k++) nnz[i]++;
2089   }
2090   PetscFunctionReturn(PETSC_SUCCESS);
2091 }
2092 
2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2095 {
2096   PetscInt    m = Y->rmap->N;
2097   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2098   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2099 
2100   PetscFunctionBegin;
2101   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2102   PetscFunctionReturn(PETSC_SUCCESS);
2103 }
2104 
2105 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2106 {
2107   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2108 
2109   PetscFunctionBegin;
2110   if (str == SAME_NONZERO_PATTERN) {
2111     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2112     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2113   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2114     PetscCall(MatAXPY_Basic(Y, a, X, str));
2115   } else {
2116     Mat       B;
2117     PetscInt *nnz_d, *nnz_o;
2118 
2119     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2120     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2121     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2122     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2123     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2124     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2125     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2126     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2127     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2128     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2129     PetscCall(MatHeaderMerge(Y, &B));
2130     PetscCall(PetscFree(nnz_d));
2131     PetscCall(PetscFree(nnz_o));
2132   }
2133   PetscFunctionReturn(PETSC_SUCCESS);
2134 }
2135 
2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2137 
2138 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2139 {
2140   PetscFunctionBegin;
2141   if (PetscDefined(USE_COMPLEX)) {
2142     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2143 
2144     PetscCall(MatConjugate_SeqAIJ(aij->A));
2145     PetscCall(MatConjugate_SeqAIJ(aij->B));
2146   }
2147   PetscFunctionReturn(PETSC_SUCCESS);
2148 }
2149 
2150 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2151 {
2152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(MatRealPart(a->A));
2156   PetscCall(MatRealPart(a->B));
2157   PetscFunctionReturn(PETSC_SUCCESS);
2158 }
2159 
2160 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2161 {
2162   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2163 
2164   PetscFunctionBegin;
2165   PetscCall(MatImaginaryPart(a->A));
2166   PetscCall(MatImaginaryPart(a->B));
2167   PetscFunctionReturn(PETSC_SUCCESS);
2168 }
2169 
2170 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2171 {
2172   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2173   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2174   PetscScalar       *va, *vv;
2175   Vec                vB, vA;
2176   const PetscScalar *vb;
2177 
2178   PetscFunctionBegin;
2179   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2180   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2181 
2182   PetscCall(VecGetArrayWrite(vA, &va));
2183   if (idx) {
2184     for (i = 0; i < m; i++) {
2185       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2186     }
2187   }
2188 
2189   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2190   PetscCall(PetscMalloc1(m, &idxb));
2191   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2192 
2193   PetscCall(VecGetArrayWrite(v, &vv));
2194   PetscCall(VecGetArrayRead(vB, &vb));
2195   for (i = 0; i < m; i++) {
2196     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2197       vv[i] = vb[i];
2198       if (idx) idx[i] = a->garray[idxb[i]];
2199     } else {
2200       vv[i] = va[i];
2201       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2202     }
2203   }
2204   PetscCall(VecRestoreArrayWrite(vA, &vv));
2205   PetscCall(VecRestoreArrayWrite(vA, &va));
2206   PetscCall(VecRestoreArrayRead(vB, &vb));
2207   PetscCall(PetscFree(idxb));
2208   PetscCall(VecDestroy(&vA));
2209   PetscCall(VecDestroy(&vB));
2210   PetscFunctionReturn(PETSC_SUCCESS);
2211 }
2212 
2213 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2214 {
2215   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2216   Vec         vB, vA;
2217 
2218   PetscFunctionBegin;
2219   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2220   PetscCall(MatGetRowSumAbs(a->A, vA));
2221   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2222   PetscCall(MatGetRowSumAbs(a->B, vB));
2223   PetscCall(VecAXPY(vA, 1.0, vB));
2224   PetscCall(VecDestroy(&vB));
2225   PetscCall(VecCopy(vA, v));
2226   PetscCall(VecDestroy(&vA));
2227   PetscFunctionReturn(PETSC_SUCCESS);
2228 }
2229 
2230 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2231 {
2232   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2233   PetscInt           m = A->rmap->n, n = A->cmap->n;
2234   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2235   PetscInt          *cmap = mat->garray;
2236   PetscInt          *diagIdx, *offdiagIdx;
2237   Vec                diagV, offdiagV;
2238   PetscScalar       *a, *diagA, *offdiagA;
2239   const PetscScalar *ba, *bav;
2240   PetscInt           r, j, col, ncols, *bi, *bj;
2241   Mat                B = mat->B;
2242   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2243 
2244   PetscFunctionBegin;
2245   /* When a process holds entire A and other processes have no entry */
2246   if (A->cmap->N == n) {
2247     PetscCall(VecGetArrayWrite(v, &diagA));
2248     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2249     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2250     PetscCall(VecDestroy(&diagV));
2251     PetscCall(VecRestoreArrayWrite(v, &diagA));
2252     PetscFunctionReturn(PETSC_SUCCESS);
2253   } else if (n == 0) {
2254     if (m) {
2255       PetscCall(VecGetArrayWrite(v, &a));
2256       for (r = 0; r < m; r++) {
2257         a[r] = 0.0;
2258         if (idx) idx[r] = -1;
2259       }
2260       PetscCall(VecRestoreArrayWrite(v, &a));
2261     }
2262     PetscFunctionReturn(PETSC_SUCCESS);
2263   }
2264 
2265   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2266   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2267   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2268   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2269 
2270   /* Get offdiagIdx[] for implicit 0.0 */
2271   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2272   ba = bav;
2273   bi = b->i;
2274   bj = b->j;
2275   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2276   for (r = 0; r < m; r++) {
2277     ncols = bi[r + 1] - bi[r];
2278     if (ncols == A->cmap->N - n) { /* Brow is dense */
2279       offdiagA[r]   = *ba;
2280       offdiagIdx[r] = cmap[0];
2281     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2282       offdiagA[r] = 0.0;
2283 
2284       /* Find first hole in the cmap */
2285       for (j = 0; j < ncols; j++) {
2286         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2287         if (col > j && j < cstart) {
2288           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2289           break;
2290         } else if (col > j + n && j >= cstart) {
2291           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2292           break;
2293         }
2294       }
2295       if (j == ncols && ncols < A->cmap->N - n) {
2296         /* a hole is outside compressed Bcols */
2297         if (ncols == 0) {
2298           if (cstart) {
2299             offdiagIdx[r] = 0;
2300           } else offdiagIdx[r] = cend;
2301         } else { /* ncols > 0 */
2302           offdiagIdx[r] = cmap[ncols - 1] + 1;
2303           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2304         }
2305       }
2306     }
2307 
2308     for (j = 0; j < ncols; j++) {
2309       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2310         offdiagA[r]   = *ba;
2311         offdiagIdx[r] = cmap[*bj];
2312       }
2313       ba++;
2314       bj++;
2315     }
2316   }
2317 
2318   PetscCall(VecGetArrayWrite(v, &a));
2319   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2320   for (r = 0; r < m; ++r) {
2321     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2322       a[r] = diagA[r];
2323       if (idx) idx[r] = cstart + diagIdx[r];
2324     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2325       a[r] = diagA[r];
2326       if (idx) {
2327         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2328           idx[r] = cstart + diagIdx[r];
2329         } else idx[r] = offdiagIdx[r];
2330       }
2331     } else {
2332       a[r] = offdiagA[r];
2333       if (idx) idx[r] = offdiagIdx[r];
2334     }
2335   }
2336   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2337   PetscCall(VecRestoreArrayWrite(v, &a));
2338   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2339   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2340   PetscCall(VecDestroy(&diagV));
2341   PetscCall(VecDestroy(&offdiagV));
2342   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2343   PetscFunctionReturn(PETSC_SUCCESS);
2344 }
2345 
2346 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2347 {
2348   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2349   PetscInt           m = A->rmap->n, n = A->cmap->n;
2350   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2351   PetscInt          *cmap = mat->garray;
2352   PetscInt          *diagIdx, *offdiagIdx;
2353   Vec                diagV, offdiagV;
2354   PetscScalar       *a, *diagA, *offdiagA;
2355   const PetscScalar *ba, *bav;
2356   PetscInt           r, j, col, ncols, *bi, *bj;
2357   Mat                B = mat->B;
2358   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2359 
2360   PetscFunctionBegin;
2361   /* When a process holds entire A and other processes have no entry */
2362   if (A->cmap->N == n) {
2363     PetscCall(VecGetArrayWrite(v, &diagA));
2364     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2365     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2366     PetscCall(VecDestroy(&diagV));
2367     PetscCall(VecRestoreArrayWrite(v, &diagA));
2368     PetscFunctionReturn(PETSC_SUCCESS);
2369   } else if (n == 0) {
2370     if (m) {
2371       PetscCall(VecGetArrayWrite(v, &a));
2372       for (r = 0; r < m; r++) {
2373         a[r] = PETSC_MAX_REAL;
2374         if (idx) idx[r] = -1;
2375       }
2376       PetscCall(VecRestoreArrayWrite(v, &a));
2377     }
2378     PetscFunctionReturn(PETSC_SUCCESS);
2379   }
2380 
2381   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2382   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2383   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2384   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2385 
2386   /* Get offdiagIdx[] for implicit 0.0 */
2387   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2388   ba = bav;
2389   bi = b->i;
2390   bj = b->j;
2391   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2392   for (r = 0; r < m; r++) {
2393     ncols = bi[r + 1] - bi[r];
2394     if (ncols == A->cmap->N - n) { /* Brow is dense */
2395       offdiagA[r]   = *ba;
2396       offdiagIdx[r] = cmap[0];
2397     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2398       offdiagA[r] = 0.0;
2399 
2400       /* Find first hole in the cmap */
2401       for (j = 0; j < ncols; j++) {
2402         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2403         if (col > j && j < cstart) {
2404           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2405           break;
2406         } else if (col > j + n && j >= cstart) {
2407           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2408           break;
2409         }
2410       }
2411       if (j == ncols && ncols < A->cmap->N - n) {
2412         /* a hole is outside compressed Bcols */
2413         if (ncols == 0) {
2414           if (cstart) {
2415             offdiagIdx[r] = 0;
2416           } else offdiagIdx[r] = cend;
2417         } else { /* ncols > 0 */
2418           offdiagIdx[r] = cmap[ncols - 1] + 1;
2419           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2420         }
2421       }
2422     }
2423 
2424     for (j = 0; j < ncols; j++) {
2425       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2426         offdiagA[r]   = *ba;
2427         offdiagIdx[r] = cmap[*bj];
2428       }
2429       ba++;
2430       bj++;
2431     }
2432   }
2433 
2434   PetscCall(VecGetArrayWrite(v, &a));
2435   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2436   for (r = 0; r < m; ++r) {
2437     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2438       a[r] = diagA[r];
2439       if (idx) idx[r] = cstart + diagIdx[r];
2440     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2441       a[r] = diagA[r];
2442       if (idx) {
2443         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2444           idx[r] = cstart + diagIdx[r];
2445         } else idx[r] = offdiagIdx[r];
2446       }
2447     } else {
2448       a[r] = offdiagA[r];
2449       if (idx) idx[r] = offdiagIdx[r];
2450     }
2451   }
2452   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2453   PetscCall(VecRestoreArrayWrite(v, &a));
2454   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2455   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2456   PetscCall(VecDestroy(&diagV));
2457   PetscCall(VecDestroy(&offdiagV));
2458   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2459   PetscFunctionReturn(PETSC_SUCCESS);
2460 }
2461 
2462 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2463 {
2464   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2465   PetscInt           m = A->rmap->n, n = A->cmap->n;
2466   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2467   PetscInt          *cmap = mat->garray;
2468   PetscInt          *diagIdx, *offdiagIdx;
2469   Vec                diagV, offdiagV;
2470   PetscScalar       *a, *diagA, *offdiagA;
2471   const PetscScalar *ba, *bav;
2472   PetscInt           r, j, col, ncols, *bi, *bj;
2473   Mat                B = mat->B;
2474   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2475 
2476   PetscFunctionBegin;
2477   /* When a process holds entire A and other processes have no entry */
2478   if (A->cmap->N == n) {
2479     PetscCall(VecGetArrayWrite(v, &diagA));
2480     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2481     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2482     PetscCall(VecDestroy(&diagV));
2483     PetscCall(VecRestoreArrayWrite(v, &diagA));
2484     PetscFunctionReturn(PETSC_SUCCESS);
2485   } else if (n == 0) {
2486     if (m) {
2487       PetscCall(VecGetArrayWrite(v, &a));
2488       for (r = 0; r < m; r++) {
2489         a[r] = PETSC_MIN_REAL;
2490         if (idx) idx[r] = -1;
2491       }
2492       PetscCall(VecRestoreArrayWrite(v, &a));
2493     }
2494     PetscFunctionReturn(PETSC_SUCCESS);
2495   }
2496 
2497   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2498   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2499   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2500   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2501 
2502   /* Get offdiagIdx[] for implicit 0.0 */
2503   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2504   ba = bav;
2505   bi = b->i;
2506   bj = b->j;
2507   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2508   for (r = 0; r < m; r++) {
2509     ncols = bi[r + 1] - bi[r];
2510     if (ncols == A->cmap->N - n) { /* Brow is dense */
2511       offdiagA[r]   = *ba;
2512       offdiagIdx[r] = cmap[0];
2513     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2514       offdiagA[r] = 0.0;
2515 
2516       /* Find first hole in the cmap */
2517       for (j = 0; j < ncols; j++) {
2518         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2519         if (col > j && j < cstart) {
2520           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2521           break;
2522         } else if (col > j + n && j >= cstart) {
2523           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2524           break;
2525         }
2526       }
2527       if (j == ncols && ncols < A->cmap->N - n) {
2528         /* a hole is outside compressed Bcols */
2529         if (ncols == 0) {
2530           if (cstart) {
2531             offdiagIdx[r] = 0;
2532           } else offdiagIdx[r] = cend;
2533         } else { /* ncols > 0 */
2534           offdiagIdx[r] = cmap[ncols - 1] + 1;
2535           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2536         }
2537       }
2538     }
2539 
2540     for (j = 0; j < ncols; j++) {
2541       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2542         offdiagA[r]   = *ba;
2543         offdiagIdx[r] = cmap[*bj];
2544       }
2545       ba++;
2546       bj++;
2547     }
2548   }
2549 
2550   PetscCall(VecGetArrayWrite(v, &a));
2551   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2552   for (r = 0; r < m; ++r) {
2553     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2554       a[r] = diagA[r];
2555       if (idx) idx[r] = cstart + diagIdx[r];
2556     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2557       a[r] = diagA[r];
2558       if (idx) {
2559         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2560           idx[r] = cstart + diagIdx[r];
2561         } else idx[r] = offdiagIdx[r];
2562       }
2563     } else {
2564       a[r] = offdiagA[r];
2565       if (idx) idx[r] = offdiagIdx[r];
2566     }
2567   }
2568   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2569   PetscCall(VecRestoreArrayWrite(v, &a));
2570   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2571   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2572   PetscCall(VecDestroy(&diagV));
2573   PetscCall(VecDestroy(&offdiagV));
2574   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2575   PetscFunctionReturn(PETSC_SUCCESS);
2576 }
2577 
2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2579 {
2580   Mat *dummy;
2581 
2582   PetscFunctionBegin;
2583   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2584   *newmat = *dummy;
2585   PetscCall(PetscFree(dummy));
2586   PetscFunctionReturn(PETSC_SUCCESS);
2587 }
2588 
2589 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2590 {
2591   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2592 
2593   PetscFunctionBegin;
2594   PetscCall(MatInvertBlockDiagonal(a->A, values));
2595   A->factorerrortype = a->A->factorerrortype;
2596   PetscFunctionReturn(PETSC_SUCCESS);
2597 }
2598 
2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2600 {
2601   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2602 
2603   PetscFunctionBegin;
2604   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2605   PetscCall(MatSetRandom(aij->A, rctx));
2606   if (x->assembled) {
2607     PetscCall(MatSetRandom(aij->B, rctx));
2608   } else {
2609     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2610   }
2611   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2612   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2613   PetscFunctionReturn(PETSC_SUCCESS);
2614 }
2615 
2616 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2617 {
2618   PetscFunctionBegin;
2619   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2620   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2621   PetscFunctionReturn(PETSC_SUCCESS);
2622 }
2623 
2624 /*@
2625   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2626 
2627   Not Collective
2628 
2629   Input Parameter:
2630 . A - the matrix
2631 
2632   Output Parameter:
2633 . nz - the number of nonzeros
2634 
2635   Level: advanced
2636 
2637 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2638 @*/
2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2640 {
2641   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2642   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2643   PetscBool   isaij;
2644 
2645   PetscFunctionBegin;
2646   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2647   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2648   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2649   PetscFunctionReturn(PETSC_SUCCESS);
2650 }
2651 
2652 /*@
2653   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2654 
2655   Collective
2656 
2657   Input Parameters:
2658 + A  - the matrix
2659 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2660 
2661   Level: advanced
2662 
2663 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2664 @*/
2665 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2666 {
2667   PetscFunctionBegin;
2668   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
2672 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2673 {
2674   PetscBool sc = PETSC_FALSE, flg;
2675 
2676   PetscFunctionBegin;
2677   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2678   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2679   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2680   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2681   PetscOptionsHeadEnd();
2682   PetscFunctionReturn(PETSC_SUCCESS);
2683 }
2684 
2685 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2686 {
2687   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2688   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2689 
2690   PetscFunctionBegin;
2691   if (!Y->preallocated) {
2692     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2693   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2694     PetscInt nonew = aij->nonew;
2695     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2696     aij->nonew = nonew;
2697   }
2698   PetscCall(MatShift_Basic(Y, a));
2699   PetscFunctionReturn(PETSC_SUCCESS);
2700 }
2701 
2702 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2703 {
2704   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2705 
2706   PetscFunctionBegin;
2707   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2708   PetscCall(MatMissingDiagonal(a->A, missing, d));
2709   if (d) {
2710     PetscInt rstart;
2711     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2712     *d += rstart;
2713   }
2714   PetscFunctionReturn(PETSC_SUCCESS);
2715 }
2716 
2717 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2718 {
2719   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2720 
2721   PetscFunctionBegin;
2722   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2723   PetscFunctionReturn(PETSC_SUCCESS);
2724 }
2725 
2726 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2727 {
2728   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2729 
2730   PetscFunctionBegin;
2731   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2732   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2733   PetscFunctionReturn(PETSC_SUCCESS);
2734 }
2735 
2736 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2737                                        MatGetRow_MPIAIJ,
2738                                        MatRestoreRow_MPIAIJ,
2739                                        MatMult_MPIAIJ,
2740                                        /* 4*/ MatMultAdd_MPIAIJ,
2741                                        MatMultTranspose_MPIAIJ,
2742                                        MatMultTransposeAdd_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        /*10*/ NULL,
2747                                        NULL,
2748                                        NULL,
2749                                        MatSOR_MPIAIJ,
2750                                        MatTranspose_MPIAIJ,
2751                                        /*15*/ MatGetInfo_MPIAIJ,
2752                                        MatEqual_MPIAIJ,
2753                                        MatGetDiagonal_MPIAIJ,
2754                                        MatDiagonalScale_MPIAIJ,
2755                                        MatNorm_MPIAIJ,
2756                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2757                                        MatAssemblyEnd_MPIAIJ,
2758                                        MatSetOption_MPIAIJ,
2759                                        MatZeroEntries_MPIAIJ,
2760                                        /*24*/ MatZeroRows_MPIAIJ,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                        NULL,
2765                                        /*29*/ MatSetUp_MPI_Hash,
2766                                        NULL,
2767                                        NULL,
2768                                        MatGetDiagonalBlock_MPIAIJ,
2769                                        NULL,
2770                                        /*34*/ MatDuplicate_MPIAIJ,
2771                                        NULL,
2772                                        NULL,
2773                                        NULL,
2774                                        NULL,
2775                                        /*39*/ MatAXPY_MPIAIJ,
2776                                        MatCreateSubMatrices_MPIAIJ,
2777                                        MatIncreaseOverlap_MPIAIJ,
2778                                        MatGetValues_MPIAIJ,
2779                                        MatCopy_MPIAIJ,
2780                                        /*44*/ MatGetRowMax_MPIAIJ,
2781                                        MatScale_MPIAIJ,
2782                                        MatShift_MPIAIJ,
2783                                        MatDiagonalSet_MPIAIJ,
2784                                        MatZeroRowsColumns_MPIAIJ,
2785                                        /*49*/ MatSetRandom_MPIAIJ,
2786                                        MatGetRowIJ_MPIAIJ,
2787                                        MatRestoreRowIJ_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2791                                        NULL,
2792                                        MatSetUnfactored_MPIAIJ,
2793                                        MatPermute_MPIAIJ,
2794                                        NULL,
2795                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2796                                        MatDestroy_MPIAIJ,
2797                                        MatView_MPIAIJ,
2798                                        NULL,
2799                                        NULL,
2800                                        /*64*/ NULL,
2801                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2806                                        MatGetRowMinAbs_MPIAIJ,
2807                                        NULL,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        /*75*/ MatFDColoringApply_AIJ,
2812                                        MatSetFromOptions_MPIAIJ,
2813                                        NULL,
2814                                        NULL,
2815                                        MatFindZeroDiagonals_MPIAIJ,
2816                                        /*80*/ NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        /*83*/ MatLoad_MPIAIJ,
2820                                        NULL,
2821                                        NULL,
2822                                        NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        /*89*/ NULL,
2826                                        NULL,
2827                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2828                                        NULL,
2829                                        NULL,
2830                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        NULL,
2834                                        MatBindToCPU_MPIAIJ,
2835                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2836                                        NULL,
2837                                        NULL,
2838                                        MatConjugate_MPIAIJ,
2839                                        NULL,
2840                                        /*104*/ MatSetValuesRow_MPIAIJ,
2841                                        MatRealPart_MPIAIJ,
2842                                        MatImaginaryPart_MPIAIJ,
2843                                        NULL,
2844                                        NULL,
2845                                        /*109*/ NULL,
2846                                        NULL,
2847                                        MatGetRowMin_MPIAIJ,
2848                                        NULL,
2849                                        MatMissingDiagonal_MPIAIJ,
2850                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2851                                        NULL,
2852                                        MatGetGhosts_MPIAIJ,
2853                                        NULL,
2854                                        NULL,
2855                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2856                                        NULL,
2857                                        NULL,
2858                                        NULL,
2859                                        MatGetMultiProcBlock_MPIAIJ,
2860                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2861                                        MatGetColumnReductions_MPIAIJ,
2862                                        MatInvertBlockDiagonal_MPIAIJ,
2863                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2864                                        MatCreateSubMatricesMPI_MPIAIJ,
2865                                        /*129*/ NULL,
2866                                        NULL,
2867                                        NULL,
2868                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2869                                        NULL,
2870                                        /*134*/ NULL,
2871                                        NULL,
2872                                        NULL,
2873                                        NULL,
2874                                        NULL,
2875                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2876                                        NULL,
2877                                        NULL,
2878                                        MatFDColoringSetUp_MPIXAIJ,
2879                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2880                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2881                                        /*145*/ NULL,
2882                                        NULL,
2883                                        NULL,
2884                                        MatCreateGraph_Simple_AIJ,
2885                                        NULL,
2886                                        /*150*/ NULL,
2887                                        MatEliminateZeros_MPIAIJ,
2888                                        MatGetRowSumAbs_MPIAIJ,
2889                                        NULL,
2890                                        NULL,
2891                                        /*155*/ NULL,
2892                                        MatCopyHashToXAIJ_MPI_Hash};
2893 
2894 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2895 {
2896   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2897 
2898   PetscFunctionBegin;
2899   PetscCall(MatStoreValues(aij->A));
2900   PetscCall(MatStoreValues(aij->B));
2901   PetscFunctionReturn(PETSC_SUCCESS);
2902 }
2903 
2904 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2905 {
2906   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2907 
2908   PetscFunctionBegin;
2909   PetscCall(MatRetrieveValues(aij->A));
2910   PetscCall(MatRetrieveValues(aij->B));
2911   PetscFunctionReturn(PETSC_SUCCESS);
2912 }
2913 
2914 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2915 {
2916   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2917   PetscMPIInt size;
2918 
2919   PetscFunctionBegin;
2920   if (B->hash_active) {
2921     B->ops[0]      = b->cops;
2922     B->hash_active = PETSC_FALSE;
2923   }
2924   PetscCall(PetscLayoutSetUp(B->rmap));
2925   PetscCall(PetscLayoutSetUp(B->cmap));
2926 
2927 #if defined(PETSC_USE_CTABLE)
2928   PetscCall(PetscHMapIDestroy(&b->colmap));
2929 #else
2930   PetscCall(PetscFree(b->colmap));
2931 #endif
2932   PetscCall(PetscFree(b->garray));
2933   PetscCall(VecDestroy(&b->lvec));
2934   PetscCall(VecScatterDestroy(&b->Mvctx));
2935 
2936   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2937 
2938   MatSeqXAIJGetOptions_Private(b->B);
2939   PetscCall(MatDestroy(&b->B));
2940   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2941   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2942   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2943   PetscCall(MatSetType(b->B, MATSEQAIJ));
2944   MatSeqXAIJRestoreOptions_Private(b->B);
2945 
2946   MatSeqXAIJGetOptions_Private(b->A);
2947   PetscCall(MatDestroy(&b->A));
2948   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2949   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2950   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2951   PetscCall(MatSetType(b->A, MATSEQAIJ));
2952   MatSeqXAIJRestoreOptions_Private(b->A);
2953 
2954   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2955   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2956   B->preallocated  = PETSC_TRUE;
2957   B->was_assembled = PETSC_FALSE;
2958   B->assembled     = PETSC_FALSE;
2959   PetscFunctionReturn(PETSC_SUCCESS);
2960 }
2961 
2962 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2963 {
2964   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2965 
2966   PetscFunctionBegin;
2967   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2968   PetscCall(PetscLayoutSetUp(B->rmap));
2969   PetscCall(PetscLayoutSetUp(B->cmap));
2970   if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2971   else {
2972 #if defined(PETSC_USE_CTABLE)
2973     PetscCall(PetscHMapIDestroy(&b->colmap));
2974 #else
2975     PetscCall(PetscFree(b->colmap));
2976 #endif
2977     PetscCall(PetscFree(b->garray));
2978     PetscCall(VecDestroy(&b->lvec));
2979   }
2980   PetscCall(VecScatterDestroy(&b->Mvctx));
2981 
2982   PetscCall(MatResetPreallocation(b->A));
2983   PetscCall(MatResetPreallocation(b->B));
2984   B->preallocated  = PETSC_TRUE;
2985   B->was_assembled = PETSC_FALSE;
2986   B->assembled     = PETSC_FALSE;
2987   PetscFunctionReturn(PETSC_SUCCESS);
2988 }
2989 
2990 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2991 {
2992   Mat         mat;
2993   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2994 
2995   PetscFunctionBegin;
2996   *newmat = NULL;
2997   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2998   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2999   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
3000   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3001   a = (Mat_MPIAIJ *)mat->data;
3002 
3003   mat->factortype = matin->factortype;
3004   mat->assembled  = matin->assembled;
3005   mat->insertmode = NOT_SET_VALUES;
3006 
3007   a->size         = oldmat->size;
3008   a->rank         = oldmat->rank;
3009   a->donotstash   = oldmat->donotstash;
3010   a->roworiented  = oldmat->roworiented;
3011   a->rowindices   = NULL;
3012   a->rowvalues    = NULL;
3013   a->getrowactive = PETSC_FALSE;
3014 
3015   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3016   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3017   if (matin->hash_active) {
3018     PetscCall(MatSetUp(mat));
3019   } else {
3020     mat->preallocated = matin->preallocated;
3021     if (oldmat->colmap) {
3022 #if defined(PETSC_USE_CTABLE)
3023       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3024 #else
3025       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3026       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3027 #endif
3028     } else a->colmap = NULL;
3029     if (oldmat->garray) {
3030       PetscInt len;
3031       len = oldmat->B->cmap->n;
3032       PetscCall(PetscMalloc1(len + 1, &a->garray));
3033       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3034     } else a->garray = NULL;
3035 
3036     /* It may happen MatDuplicate is called with a non-assembled matrix
3037       In fact, MatDuplicate only requires the matrix to be preallocated
3038       This may happen inside a DMCreateMatrix_Shell */
3039     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3040     if (oldmat->Mvctx) {
3041       a->Mvctx = oldmat->Mvctx;
3042       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3043     }
3044     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3045     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3046   }
3047   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3048   *newmat = mat;
3049   PetscFunctionReturn(PETSC_SUCCESS);
3050 }
3051 
3052 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3053 {
3054   PetscBool isbinary, ishdf5;
3055 
3056   PetscFunctionBegin;
3057   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3058   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3059   /* force binary viewer to load .info file if it has not yet done so */
3060   PetscCall(PetscViewerSetUp(viewer));
3061   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3062   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3063   if (isbinary) {
3064     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3065   } else if (ishdf5) {
3066 #if defined(PETSC_HAVE_HDF5)
3067     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3068 #else
3069     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3070 #endif
3071   } else {
3072     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3073   }
3074   PetscFunctionReturn(PETSC_SUCCESS);
3075 }
3076 
3077 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3078 {
3079   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3080   PetscInt    *rowidxs, *colidxs;
3081   PetscScalar *matvals;
3082 
3083   PetscFunctionBegin;
3084   PetscCall(PetscViewerSetUp(viewer));
3085 
3086   /* read in matrix header */
3087   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3088   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3089   M  = header[1];
3090   N  = header[2];
3091   nz = header[3];
3092   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3093   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3094   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3095 
3096   /* set block sizes from the viewer's .info file */
3097   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3098   /* set global sizes if not set already */
3099   if (mat->rmap->N < 0) mat->rmap->N = M;
3100   if (mat->cmap->N < 0) mat->cmap->N = N;
3101   PetscCall(PetscLayoutSetUp(mat->rmap));
3102   PetscCall(PetscLayoutSetUp(mat->cmap));
3103 
3104   /* check if the matrix sizes are correct */
3105   PetscCall(MatGetSize(mat, &rows, &cols));
3106   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3107 
3108   /* read in row lengths and build row indices */
3109   PetscCall(MatGetLocalSize(mat, &m, NULL));
3110   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3111   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3112   rowidxs[0] = 0;
3113   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3114   if (nz != PETSC_INT_MAX) {
3115     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3116     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3117   }
3118 
3119   /* read in column indices and matrix values */
3120   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3121   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3122   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3123   /* store matrix indices and values */
3124   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3125   PetscCall(PetscFree(rowidxs));
3126   PetscCall(PetscFree2(colidxs, matvals));
3127   PetscFunctionReturn(PETSC_SUCCESS);
3128 }
3129 
3130 /* Not scalable because of ISAllGather() unless getting all columns. */
3131 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3132 {
3133   IS          iscol_local;
3134   PetscBool   isstride;
3135   PetscMPIInt lisstride = 0, gisstride;
3136 
3137   PetscFunctionBegin;
3138   /* check if we are grabbing all columns*/
3139   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3140 
3141   if (isstride) {
3142     PetscInt start, len, mstart, mlen;
3143     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3144     PetscCall(ISGetLocalSize(iscol, &len));
3145     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3146     if (mstart == start && mlen - mstart == len) lisstride = 1;
3147   }
3148 
3149   PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3150   if (gisstride) {
3151     PetscInt N;
3152     PetscCall(MatGetSize(mat, NULL, &N));
3153     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3154     PetscCall(ISSetIdentity(iscol_local));
3155     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3156   } else {
3157     PetscInt cbs;
3158     PetscCall(ISGetBlockSize(iscol, &cbs));
3159     PetscCall(ISAllGather(iscol, &iscol_local));
3160     PetscCall(ISSetBlockSize(iscol_local, cbs));
3161   }
3162 
3163   *isseq = iscol_local;
3164   PetscFunctionReturn(PETSC_SUCCESS);
3165 }
3166 
3167 /*
3168  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3169  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3170 
3171  Input Parameters:
3172 +   mat - matrix
3173 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3174            i.e., mat->rstart <= isrow[i] < mat->rend
3175 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3176            i.e., mat->cstart <= iscol[i] < mat->cend
3177 
3178  Output Parameters:
3179 +   isrow_d - sequential row index set for retrieving mat->A
3180 .   iscol_d - sequential  column index set for retrieving mat->A
3181 .   iscol_o - sequential column index set for retrieving mat->B
3182 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3183  */
3184 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3185 {
3186   Vec             x, cmap;
3187   const PetscInt *is_idx;
3188   PetscScalar    *xarray, *cmaparray;
3189   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3190   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3191   Mat             B    = a->B;
3192   Vec             lvec = a->lvec, lcmap;
3193   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3194   MPI_Comm        comm;
3195   VecScatter      Mvctx = a->Mvctx;
3196 
3197   PetscFunctionBegin;
3198   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3199   PetscCall(ISGetLocalSize(iscol, &ncols));
3200 
3201   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3202   PetscCall(MatCreateVecs(mat, &x, NULL));
3203   PetscCall(VecSet(x, -1.0));
3204   PetscCall(VecDuplicate(x, &cmap));
3205   PetscCall(VecSet(cmap, -1.0));
3206 
3207   /* Get start indices */
3208   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3209   isstart -= ncols;
3210   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3211 
3212   PetscCall(ISGetIndices(iscol, &is_idx));
3213   PetscCall(VecGetArray(x, &xarray));
3214   PetscCall(VecGetArray(cmap, &cmaparray));
3215   PetscCall(PetscMalloc1(ncols, &idx));
3216   for (i = 0; i < ncols; i++) {
3217     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3218     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3219     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3220   }
3221   PetscCall(VecRestoreArray(x, &xarray));
3222   PetscCall(VecRestoreArray(cmap, &cmaparray));
3223   PetscCall(ISRestoreIndices(iscol, &is_idx));
3224 
3225   /* Get iscol_d */
3226   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3227   PetscCall(ISGetBlockSize(iscol, &i));
3228   PetscCall(ISSetBlockSize(*iscol_d, i));
3229 
3230   /* Get isrow_d */
3231   PetscCall(ISGetLocalSize(isrow, &m));
3232   rstart = mat->rmap->rstart;
3233   PetscCall(PetscMalloc1(m, &idx));
3234   PetscCall(ISGetIndices(isrow, &is_idx));
3235   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3236   PetscCall(ISRestoreIndices(isrow, &is_idx));
3237 
3238   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3239   PetscCall(ISGetBlockSize(isrow, &i));
3240   PetscCall(ISSetBlockSize(*isrow_d, i));
3241 
3242   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3243   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3244   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3245 
3246   PetscCall(VecDuplicate(lvec, &lcmap));
3247 
3248   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3249   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3250 
3251   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3252   /* off-process column indices */
3253   count = 0;
3254   PetscCall(PetscMalloc1(Bn, &idx));
3255   PetscCall(PetscMalloc1(Bn, &cmap1));
3256 
3257   PetscCall(VecGetArray(lvec, &xarray));
3258   PetscCall(VecGetArray(lcmap, &cmaparray));
3259   for (i = 0; i < Bn; i++) {
3260     if (PetscRealPart(xarray[i]) > -1.0) {
3261       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3262       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3263       count++;
3264     }
3265   }
3266   PetscCall(VecRestoreArray(lvec, &xarray));
3267   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3268 
3269   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3270   /* cannot ensure iscol_o has same blocksize as iscol! */
3271 
3272   PetscCall(PetscFree(idx));
3273   *garray = cmap1;
3274 
3275   PetscCall(VecDestroy(&x));
3276   PetscCall(VecDestroy(&cmap));
3277   PetscCall(VecDestroy(&lcmap));
3278   PetscFunctionReturn(PETSC_SUCCESS);
3279 }
3280 
3281 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3282 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3283 {
3284   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3285   Mat         M = NULL;
3286   MPI_Comm    comm;
3287   IS          iscol_d, isrow_d, iscol_o;
3288   Mat         Asub = NULL, Bsub = NULL;
3289   PetscInt    n;
3290 
3291   PetscFunctionBegin;
3292   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3293 
3294   if (call == MAT_REUSE_MATRIX) {
3295     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3296     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3297     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3298 
3299     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3300     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3301 
3302     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3303     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3304 
3305     /* Update diagonal and off-diagonal portions of submat */
3306     asub = (Mat_MPIAIJ *)(*submat)->data;
3307     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3308     PetscCall(ISGetLocalSize(iscol_o, &n));
3309     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3310     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3311     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3312 
3313   } else { /* call == MAT_INITIAL_MATRIX) */
3314     PetscInt *garray;
3315     PetscInt  BsubN;
3316 
3317     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3318     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3319 
3320     /* Create local submatrices Asub and Bsub */
3321     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3322     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3323 
3324     /* Create submatrix M */
3325     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3326 
3327     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3328     asub = (Mat_MPIAIJ *)M->data;
3329 
3330     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3331     n = asub->B->cmap->N;
3332     if (BsubN > n) {
3333       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3334       const PetscInt *idx;
3335       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3336       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3337 
3338       PetscCall(PetscMalloc1(n, &idx_new));
3339       j = 0;
3340       PetscCall(ISGetIndices(iscol_o, &idx));
3341       for (i = 0; i < n; i++) {
3342         if (j >= BsubN) break;
3343         while (subgarray[i] > garray[j]) j++;
3344 
3345         if (subgarray[i] == garray[j]) {
3346           idx_new[i] = idx[j++];
3347         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3348       }
3349       PetscCall(ISRestoreIndices(iscol_o, &idx));
3350 
3351       PetscCall(ISDestroy(&iscol_o));
3352       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3353 
3354     } else if (BsubN < n) {
3355       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3356     }
3357 
3358     PetscCall(PetscFree(garray));
3359     *submat = M;
3360 
3361     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3362     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3363     PetscCall(ISDestroy(&isrow_d));
3364 
3365     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3366     PetscCall(ISDestroy(&iscol_d));
3367 
3368     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3369     PetscCall(ISDestroy(&iscol_o));
3370   }
3371   PetscFunctionReturn(PETSC_SUCCESS);
3372 }
3373 
3374 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3375 {
3376   IS        iscol_local = NULL, isrow_d;
3377   PetscInt  csize;
3378   PetscInt  n, i, j, start, end;
3379   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3380   MPI_Comm  comm;
3381 
3382   PetscFunctionBegin;
3383   /* If isrow has same processor distribution as mat,
3384      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3385   if (call == MAT_REUSE_MATRIX) {
3386     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3387     if (isrow_d) {
3388       sameRowDist  = PETSC_TRUE;
3389       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3390     } else {
3391       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3392       if (iscol_local) {
3393         sameRowDist  = PETSC_TRUE;
3394         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3395       }
3396     }
3397   } else {
3398     /* Check if isrow has same processor distribution as mat */
3399     sameDist[0] = PETSC_FALSE;
3400     PetscCall(ISGetLocalSize(isrow, &n));
3401     if (!n) {
3402       sameDist[0] = PETSC_TRUE;
3403     } else {
3404       PetscCall(ISGetMinMax(isrow, &i, &j));
3405       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3406       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3407     }
3408 
3409     /* Check if iscol has same processor distribution as mat */
3410     sameDist[1] = PETSC_FALSE;
3411     PetscCall(ISGetLocalSize(iscol, &n));
3412     if (!n) {
3413       sameDist[1] = PETSC_TRUE;
3414     } else {
3415       PetscCall(ISGetMinMax(iscol, &i, &j));
3416       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3417       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3418     }
3419 
3420     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3421     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3422     sameRowDist = tsameDist[0];
3423   }
3424 
3425   if (sameRowDist) {
3426     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3427       /* isrow and iscol have same processor distribution as mat */
3428       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3429       PetscFunctionReturn(PETSC_SUCCESS);
3430     } else { /* sameRowDist */
3431       /* isrow has same processor distribution as mat */
3432       if (call == MAT_INITIAL_MATRIX) {
3433         PetscBool sorted;
3434         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3435         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3436         PetscCall(ISGetSize(iscol, &i));
3437         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3438 
3439         PetscCall(ISSorted(iscol_local, &sorted));
3440         if (sorted) {
3441           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3442           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3443           PetscFunctionReturn(PETSC_SUCCESS);
3444         }
3445       } else { /* call == MAT_REUSE_MATRIX */
3446         IS iscol_sub;
3447         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3448         if (iscol_sub) {
3449           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3450           PetscFunctionReturn(PETSC_SUCCESS);
3451         }
3452       }
3453     }
3454   }
3455 
3456   /* General case: iscol -> iscol_local which has global size of iscol */
3457   if (call == MAT_REUSE_MATRIX) {
3458     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3459     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3460   } else {
3461     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3462   }
3463 
3464   PetscCall(ISGetLocalSize(iscol, &csize));
3465   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3466 
3467   if (call == MAT_INITIAL_MATRIX) {
3468     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3469     PetscCall(ISDestroy(&iscol_local));
3470   }
3471   PetscFunctionReturn(PETSC_SUCCESS);
3472 }
3473 
3474 /*@C
3475   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3476   and "off-diagonal" part of the matrix in CSR format.
3477 
3478   Collective
3479 
3480   Input Parameters:
3481 + comm   - MPI communicator
3482 . A      - "diagonal" portion of matrix
3483 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3484 - garray - global index of `B` columns
3485 
3486   Output Parameter:
3487 . mat - the matrix, with input `A` as its local diagonal matrix
3488 
3489   Level: advanced
3490 
3491   Notes:
3492   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3493 
3494   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3495 
3496 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3497 @*/
3498 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3499 {
3500   Mat_MPIAIJ        *maij;
3501   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3502   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3503   const PetscScalar *oa;
3504   Mat                Bnew;
3505   PetscInt           m, n, N;
3506   MatType            mpi_mat_type;
3507 
3508   PetscFunctionBegin;
3509   PetscCall(MatCreate(comm, mat));
3510   PetscCall(MatGetSize(A, &m, &n));
3511   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3512   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3513   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3514   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3515 
3516   /* Get global columns of mat */
3517   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3518 
3519   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3520   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3521   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3522   PetscCall(MatSetType(*mat, mpi_mat_type));
3523 
3524   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3525   maij = (Mat_MPIAIJ *)(*mat)->data;
3526 
3527   (*mat)->preallocated = PETSC_TRUE;
3528 
3529   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3530   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3531 
3532   /* Set A as diagonal portion of *mat */
3533   maij->A = A;
3534 
3535   nz = oi[m];
3536   for (i = 0; i < nz; i++) {
3537     col   = oj[i];
3538     oj[i] = garray[col];
3539   }
3540 
3541   /* Set Bnew as off-diagonal portion of *mat */
3542   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3543   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3544   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3545   bnew        = (Mat_SeqAIJ *)Bnew->data;
3546   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3547   maij->B     = Bnew;
3548 
3549   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3550 
3551   b->free_a  = PETSC_FALSE;
3552   b->free_ij = PETSC_FALSE;
3553   PetscCall(MatDestroy(&B));
3554 
3555   bnew->free_a  = PETSC_TRUE;
3556   bnew->free_ij = PETSC_TRUE;
3557 
3558   /* condense columns of maij->B */
3559   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3560   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3561   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3562   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3563   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3564   PetscFunctionReturn(PETSC_SUCCESS);
3565 }
3566 
3567 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3568 
3569 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3570 {
3571   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3572   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3573   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3574   Mat             M, Msub, B = a->B;
3575   MatScalar      *aa;
3576   Mat_SeqAIJ     *aij;
3577   PetscInt       *garray = a->garray, *colsub, Ncols;
3578   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3579   IS              iscol_sub, iscmap;
3580   const PetscInt *is_idx, *cmap;
3581   PetscBool       allcolumns = PETSC_FALSE;
3582   MPI_Comm        comm;
3583 
3584   PetscFunctionBegin;
3585   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3586   if (call == MAT_REUSE_MATRIX) {
3587     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3588     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3589     PetscCall(ISGetLocalSize(iscol_sub, &count));
3590 
3591     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3592     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3593 
3594     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3595     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3596 
3597     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3598 
3599   } else { /* call == MAT_INITIAL_MATRIX) */
3600     PetscBool flg;
3601 
3602     PetscCall(ISGetLocalSize(iscol, &n));
3603     PetscCall(ISGetSize(iscol, &Ncols));
3604 
3605     /* (1) iscol -> nonscalable iscol_local */
3606     /* Check for special case: each processor gets entire matrix columns */
3607     PetscCall(ISIdentity(iscol_local, &flg));
3608     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3609     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3610     if (allcolumns) {
3611       iscol_sub = iscol_local;
3612       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3613       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3614 
3615     } else {
3616       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3617       PetscInt *idx, *cmap1, k;
3618       PetscCall(PetscMalloc1(Ncols, &idx));
3619       PetscCall(PetscMalloc1(Ncols, &cmap1));
3620       PetscCall(ISGetIndices(iscol_local, &is_idx));
3621       count = 0;
3622       k     = 0;
3623       for (i = 0; i < Ncols; i++) {
3624         j = is_idx[i];
3625         if (j >= cstart && j < cend) {
3626           /* diagonal part of mat */
3627           idx[count]     = j;
3628           cmap1[count++] = i; /* column index in submat */
3629         } else if (Bn) {
3630           /* off-diagonal part of mat */
3631           if (j == garray[k]) {
3632             idx[count]     = j;
3633             cmap1[count++] = i; /* column index in submat */
3634           } else if (j > garray[k]) {
3635             while (j > garray[k] && k < Bn - 1) k++;
3636             if (j == garray[k]) {
3637               idx[count]     = j;
3638               cmap1[count++] = i; /* column index in submat */
3639             }
3640           }
3641         }
3642       }
3643       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3644 
3645       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3646       PetscCall(ISGetBlockSize(iscol, &cbs));
3647       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3648 
3649       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3650     }
3651 
3652     /* (3) Create sequential Msub */
3653     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3654   }
3655 
3656   PetscCall(ISGetLocalSize(iscol_sub, &count));
3657   aij = (Mat_SeqAIJ *)Msub->data;
3658   ii  = aij->i;
3659   PetscCall(ISGetIndices(iscmap, &cmap));
3660 
3661   /*
3662       m - number of local rows
3663       Ncols - number of columns (same on all processors)
3664       rstart - first row in new global matrix generated
3665   */
3666   PetscCall(MatGetSize(Msub, &m, NULL));
3667 
3668   if (call == MAT_INITIAL_MATRIX) {
3669     /* (4) Create parallel newmat */
3670     PetscMPIInt rank, size;
3671     PetscInt    csize;
3672 
3673     PetscCallMPI(MPI_Comm_size(comm, &size));
3674     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3675 
3676     /*
3677         Determine the number of non-zeros in the diagonal and off-diagonal
3678         portions of the matrix in order to do correct preallocation
3679     */
3680 
3681     /* first get start and end of "diagonal" columns */
3682     PetscCall(ISGetLocalSize(iscol, &csize));
3683     if (csize == PETSC_DECIDE) {
3684       PetscCall(ISGetSize(isrow, &mglobal));
3685       if (mglobal == Ncols) { /* square matrix */
3686         nlocal = m;
3687       } else {
3688         nlocal = Ncols / size + ((Ncols % size) > rank);
3689       }
3690     } else {
3691       nlocal = csize;
3692     }
3693     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3694     rstart = rend - nlocal;
3695     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3696 
3697     /* next, compute all the lengths */
3698     jj = aij->j;
3699     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3700     olens = dlens + m;
3701     for (i = 0; i < m; i++) {
3702       jend = ii[i + 1] - ii[i];
3703       olen = 0;
3704       dlen = 0;
3705       for (j = 0; j < jend; j++) {
3706         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3707         else dlen++;
3708         jj++;
3709       }
3710       olens[i] = olen;
3711       dlens[i] = dlen;
3712     }
3713 
3714     PetscCall(ISGetBlockSize(isrow, &bs));
3715     PetscCall(ISGetBlockSize(iscol, &cbs));
3716 
3717     PetscCall(MatCreate(comm, &M));
3718     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3719     PetscCall(MatSetBlockSizes(M, bs, cbs));
3720     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3721     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3722     PetscCall(PetscFree(dlens));
3723 
3724   } else { /* call == MAT_REUSE_MATRIX */
3725     M = *newmat;
3726     PetscCall(MatGetLocalSize(M, &i, NULL));
3727     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3728     PetscCall(MatZeroEntries(M));
3729     /*
3730          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3731        rather than the slower MatSetValues().
3732     */
3733     M->was_assembled = PETSC_TRUE;
3734     M->assembled     = PETSC_FALSE;
3735   }
3736 
3737   /* (5) Set values of Msub to *newmat */
3738   PetscCall(PetscMalloc1(count, &colsub));
3739   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3740 
3741   jj = aij->j;
3742   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3743   for (i = 0; i < m; i++) {
3744     row = rstart + i;
3745     nz  = ii[i + 1] - ii[i];
3746     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3747     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3748     jj += nz;
3749     aa += nz;
3750   }
3751   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3752   PetscCall(ISRestoreIndices(iscmap, &cmap));
3753 
3754   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3755   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3756 
3757   PetscCall(PetscFree(colsub));
3758 
3759   /* save Msub, iscol_sub and iscmap used in processor for next request */
3760   if (call == MAT_INITIAL_MATRIX) {
3761     *newmat = M;
3762     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3763     PetscCall(MatDestroy(&Msub));
3764 
3765     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3766     PetscCall(ISDestroy(&iscol_sub));
3767 
3768     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3769     PetscCall(ISDestroy(&iscmap));
3770 
3771     if (iscol_local) {
3772       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3773       PetscCall(ISDestroy(&iscol_local));
3774     }
3775   }
3776   PetscFunctionReturn(PETSC_SUCCESS);
3777 }
3778 
3779 /*
3780     Not great since it makes two copies of the submatrix, first an SeqAIJ
3781   in local and then by concatenating the local matrices the end result.
3782   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3783 
3784   This requires a sequential iscol with all indices.
3785 */
3786 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3787 {
3788   PetscMPIInt rank, size;
3789   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3790   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3791   Mat         M, Mreuse;
3792   MatScalar  *aa, *vwork;
3793   MPI_Comm    comm;
3794   Mat_SeqAIJ *aij;
3795   PetscBool   colflag, allcolumns = PETSC_FALSE;
3796 
3797   PetscFunctionBegin;
3798   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3799   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3800   PetscCallMPI(MPI_Comm_size(comm, &size));
3801 
3802   /* Check for special case: each processor gets entire matrix columns */
3803   PetscCall(ISIdentity(iscol, &colflag));
3804   PetscCall(ISGetLocalSize(iscol, &n));
3805   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3806   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3807 
3808   if (call == MAT_REUSE_MATRIX) {
3809     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3810     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3811     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3812   } else {
3813     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3814   }
3815 
3816   /*
3817       m - number of local rows
3818       n - number of columns (same on all processors)
3819       rstart - first row in new global matrix generated
3820   */
3821   PetscCall(MatGetSize(Mreuse, &m, &n));
3822   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3823   if (call == MAT_INITIAL_MATRIX) {
3824     aij = (Mat_SeqAIJ *)Mreuse->data;
3825     ii  = aij->i;
3826     jj  = aij->j;
3827 
3828     /*
3829         Determine the number of non-zeros in the diagonal and off-diagonal
3830         portions of the matrix in order to do correct preallocation
3831     */
3832 
3833     /* first get start and end of "diagonal" columns */
3834     if (csize == PETSC_DECIDE) {
3835       PetscCall(ISGetSize(isrow, &mglobal));
3836       if (mglobal == n) { /* square matrix */
3837         nlocal = m;
3838       } else {
3839         nlocal = n / size + ((n % size) > rank);
3840       }
3841     } else {
3842       nlocal = csize;
3843     }
3844     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3845     rstart = rend - nlocal;
3846     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3847 
3848     /* next, compute all the lengths */
3849     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3850     olens = dlens + m;
3851     for (i = 0; i < m; i++) {
3852       jend = ii[i + 1] - ii[i];
3853       olen = 0;
3854       dlen = 0;
3855       for (j = 0; j < jend; j++) {
3856         if (*jj < rstart || *jj >= rend) olen++;
3857         else dlen++;
3858         jj++;
3859       }
3860       olens[i] = olen;
3861       dlens[i] = dlen;
3862     }
3863     PetscCall(MatCreate(comm, &M));
3864     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3865     PetscCall(MatSetBlockSizes(M, bs, cbs));
3866     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3867     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3868     PetscCall(PetscFree(dlens));
3869   } else {
3870     PetscInt ml, nl;
3871 
3872     M = *newmat;
3873     PetscCall(MatGetLocalSize(M, &ml, &nl));
3874     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3875     PetscCall(MatZeroEntries(M));
3876     /*
3877          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3878        rather than the slower MatSetValues().
3879     */
3880     M->was_assembled = PETSC_TRUE;
3881     M->assembled     = PETSC_FALSE;
3882   }
3883   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3884   aij = (Mat_SeqAIJ *)Mreuse->data;
3885   ii  = aij->i;
3886   jj  = aij->j;
3887 
3888   /* trigger copy to CPU if needed */
3889   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3890   for (i = 0; i < m; i++) {
3891     row   = rstart + i;
3892     nz    = ii[i + 1] - ii[i];
3893     cwork = jj;
3894     jj    = PetscSafePointerPlusOffset(jj, nz);
3895     vwork = aa;
3896     aa    = PetscSafePointerPlusOffset(aa, nz);
3897     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3898   }
3899   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3900 
3901   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3902   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3903   *newmat = M;
3904 
3905   /* save submatrix used in processor for next request */
3906   if (call == MAT_INITIAL_MATRIX) {
3907     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3908     PetscCall(MatDestroy(&Mreuse));
3909   }
3910   PetscFunctionReturn(PETSC_SUCCESS);
3911 }
3912 
3913 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3914 {
3915   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3916   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3917   const PetscInt *JJ;
3918   PetscBool       nooffprocentries;
3919   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3920 
3921   PetscFunctionBegin;
3922   PetscCall(PetscLayoutSetUp(B->rmap));
3923   PetscCall(PetscLayoutSetUp(B->cmap));
3924   m       = B->rmap->n;
3925   cstart  = B->cmap->rstart;
3926   cend    = B->cmap->rend;
3927   rstart  = B->rmap->rstart;
3928   irstart = Ii[0];
3929 
3930   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3931 
3932   if (PetscDefined(USE_DEBUG)) {
3933     for (i = 0; i < m; i++) {
3934       nnz = Ii[i + 1] - Ii[i];
3935       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3936       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3937       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3938       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3939     }
3940   }
3941 
3942   for (i = 0; i < m; i++) {
3943     nnz     = Ii[i + 1] - Ii[i];
3944     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3945     nnz_max = PetscMax(nnz_max, nnz);
3946     d       = 0;
3947     for (j = 0; j < nnz; j++) {
3948       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3949     }
3950     d_nnz[i] = d;
3951     o_nnz[i] = nnz - d;
3952   }
3953   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3954   PetscCall(PetscFree2(d_nnz, o_nnz));
3955 
3956   for (i = 0; i < m; i++) {
3957     ii = i + rstart;
3958     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3959   }
3960   nooffprocentries    = B->nooffprocentries;
3961   B->nooffprocentries = PETSC_TRUE;
3962   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3963   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3964   B->nooffprocentries = nooffprocentries;
3965 
3966   /* count number of entries below block diagonal */
3967   PetscCall(PetscFree(Aij->ld));
3968   PetscCall(PetscCalloc1(m, &ld));
3969   Aij->ld = ld;
3970   for (i = 0; i < m; i++) {
3971     nnz = Ii[i + 1] - Ii[i];
3972     j   = 0;
3973     while (j < nnz && J[j] < cstart) j++;
3974     ld[i] = j;
3975     if (J) J += nnz;
3976   }
3977 
3978   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3979   PetscFunctionReturn(PETSC_SUCCESS);
3980 }
3981 
3982 /*@
3983   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3984   (the default parallel PETSc format).
3985 
3986   Collective
3987 
3988   Input Parameters:
3989 + B - the matrix
3990 . i - the indices into `j` for the start of each local row (indices start with zero)
3991 . j - the column indices for each local row (indices start with zero)
3992 - v - optional values in the matrix
3993 
3994   Level: developer
3995 
3996   Notes:
3997   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3998   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3999   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4000 
4001   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4002 
4003   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
4004 
4005   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
4006 
4007   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
4008   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4009 
4010   The format which is used for the sparse matrix input, is equivalent to a
4011   row-major ordering.. i.e for the following matrix, the input data expected is
4012   as shown
4013 .vb
4014         1 0 0
4015         2 0 3     P0
4016        -------
4017         4 5 6     P1
4018 
4019      Process0 [P0] rows_owned=[0,1]
4020         i =  {0,1,3}  [size = nrow+1  = 2+1]
4021         j =  {0,0,2}  [size = 3]
4022         v =  {1,2,3}  [size = 3]
4023 
4024      Process1 [P1] rows_owned=[2]
4025         i =  {0,3}    [size = nrow+1  = 1+1]
4026         j =  {0,1,2}  [size = 3]
4027         v =  {4,5,6}  [size = 3]
4028 .ve
4029 
4030 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4031           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4032 @*/
4033 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4034 {
4035   PetscFunctionBegin;
4036   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4037   PetscFunctionReturn(PETSC_SUCCESS);
4038 }
4039 
4040 /*@
4041   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4042   (the default parallel PETSc format).  For good matrix assembly performance
4043   the user should preallocate the matrix storage by setting the parameters
4044   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4045 
4046   Collective
4047 
4048   Input Parameters:
4049 + B     - the matrix
4050 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4051            (same value is used for all local rows)
4052 . d_nnz - array containing the number of nonzeros in the various rows of the
4053            DIAGONAL portion of the local submatrix (possibly different for each row)
4054            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4055            The size of this array is equal to the number of local rows, i.e 'm'.
4056            For matrices that will be factored, you must leave room for (and set)
4057            the diagonal entry even if it is zero.
4058 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4059            submatrix (same value is used for all local rows).
4060 - o_nnz - array containing the number of nonzeros in the various rows of the
4061            OFF-DIAGONAL portion of the local submatrix (possibly different for
4062            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4063            structure. The size of this array is equal to the number
4064            of local rows, i.e 'm'.
4065 
4066   Example Usage:
4067   Consider the following 8x8 matrix with 34 non-zero values, that is
4068   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4069   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4070   as follows
4071 
4072 .vb
4073             1  2  0  |  0  3  0  |  0  4
4074     Proc0   0  5  6  |  7  0  0  |  8  0
4075             9  0 10  | 11  0  0  | 12  0
4076     -------------------------------------
4077            13  0 14  | 15 16 17  |  0  0
4078     Proc1   0 18  0  | 19 20 21  |  0  0
4079             0  0  0  | 22 23  0  | 24  0
4080     -------------------------------------
4081     Proc2  25 26 27  |  0  0 28  | 29  0
4082            30  0  0  | 31 32 33  |  0 34
4083 .ve
4084 
4085   This can be represented as a collection of submatrices as
4086 .vb
4087       A B C
4088       D E F
4089       G H I
4090 .ve
4091 
4092   Where the submatrices A,B,C are owned by proc0, D,E,F are
4093   owned by proc1, G,H,I are owned by proc2.
4094 
4095   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4096   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4097   The 'M','N' parameters are 8,8, and have the same values on all procs.
4098 
4099   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4100   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4101   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4102   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4103   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4104   matrix, and [DF] as another `MATSEQAIJ` matrix.
4105 
4106   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4107   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4108   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4109   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4110   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4111   In this case, the values of `d_nz`, `o_nz` are
4112 .vb
4113      proc0  dnz = 2, o_nz = 2
4114      proc1  dnz = 3, o_nz = 2
4115      proc2  dnz = 1, o_nz = 4
4116 .ve
4117   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4118   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4119   for proc3. i.e we are using 12+15+10=37 storage locations to store
4120   34 values.
4121 
4122   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4123   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4124   In the above case the values for `d_nnz`, `o_nnz` are
4125 .vb
4126      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4127      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4128      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4129 .ve
4130   Here the space allocated is sum of all the above values i.e 34, and
4131   hence pre-allocation is perfect.
4132 
4133   Level: intermediate
4134 
4135   Notes:
4136   If the *_nnz parameter is given then the *_nz parameter is ignored
4137 
4138   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4139   storage.  The stored row and column indices begin with zero.
4140   See [Sparse Matrices](sec_matsparse) for details.
4141 
4142   The parallel matrix is partitioned such that the first m0 rows belong to
4143   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4144   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4145 
4146   The DIAGONAL portion of the local submatrix of a processor can be defined
4147   as the submatrix which is obtained by extraction the part corresponding to
4148   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4149   first row that belongs to the processor, r2 is the last row belonging to
4150   the this processor, and c1-c2 is range of indices of the local part of a
4151   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4152   common case of a square matrix, the row and column ranges are the same and
4153   the DIAGONAL part is also square. The remaining portion of the local
4154   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4155 
4156   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4157 
4158   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4159   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4160   You can also run with the option `-info` and look for messages with the string
4161   malloc in them to see if additional memory allocation was needed.
4162 
4163 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4164           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4165 @*/
4166 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4167 {
4168   PetscFunctionBegin;
4169   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4170   PetscValidType(B, 1);
4171   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4172   PetscFunctionReturn(PETSC_SUCCESS);
4173 }
4174 
4175 /*@
4176   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4177   CSR format for the local rows.
4178 
4179   Collective
4180 
4181   Input Parameters:
4182 + comm - MPI communicator
4183 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4184 . n    - This value should be the same as the local size used in creating the
4185          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4186          calculated if `N` is given) For square matrices n is almost always `m`.
4187 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4188 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4189 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4190 . j    - global column indices
4191 - a    - optional matrix values
4192 
4193   Output Parameter:
4194 . mat - the matrix
4195 
4196   Level: intermediate
4197 
4198   Notes:
4199   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4200   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4201   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4202 
4203   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4204 
4205   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4206 
4207   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4208   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4209 
4210   The format which is used for the sparse matrix input, is equivalent to a
4211   row-major ordering, i.e., for the following matrix, the input data expected is
4212   as shown
4213 .vb
4214         1 0 0
4215         2 0 3     P0
4216        -------
4217         4 5 6     P1
4218 
4219      Process0 [P0] rows_owned=[0,1]
4220         i =  {0,1,3}  [size = nrow+1  = 2+1]
4221         j =  {0,0,2}  [size = 3]
4222         v =  {1,2,3}  [size = 3]
4223 
4224      Process1 [P1] rows_owned=[2]
4225         i =  {0,3}    [size = nrow+1  = 1+1]
4226         j =  {0,1,2}  [size = 3]
4227         v =  {4,5,6}  [size = 3]
4228 .ve
4229 
4230 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4231           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4232 @*/
4233 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4234 {
4235   PetscFunctionBegin;
4236   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4237   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4238   PetscCall(MatCreate(comm, mat));
4239   PetscCall(MatSetSizes(*mat, m, n, M, N));
4240   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4241   PetscCall(MatSetType(*mat, MATMPIAIJ));
4242   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4243   PetscFunctionReturn(PETSC_SUCCESS);
4244 }
4245 
4246 /*@
4247   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4248   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4249   from `MatCreateMPIAIJWithArrays()`
4250 
4251   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4252 
4253   Collective
4254 
4255   Input Parameters:
4256 + mat - the matrix
4257 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4258 . n   - This value should be the same as the local size used in creating the
4259        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4260        calculated if N is given) For square matrices n is almost always m.
4261 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4262 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4263 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4264 . J   - column indices
4265 - v   - matrix values
4266 
4267   Level: deprecated
4268 
4269 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4270           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4271 @*/
4272 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4273 {
4274   PetscInt        nnz, i;
4275   PetscBool       nooffprocentries;
4276   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4277   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4278   PetscScalar    *ad, *ao;
4279   PetscInt        ldi, Iii, md;
4280   const PetscInt *Adi = Ad->i;
4281   PetscInt       *ld  = Aij->ld;
4282 
4283   PetscFunctionBegin;
4284   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4285   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4286   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4287   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4288 
4289   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4290   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4291 
4292   for (i = 0; i < m; i++) {
4293     if (PetscDefined(USE_DEBUG)) {
4294       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4295         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4296         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4297       }
4298     }
4299     nnz = Ii[i + 1] - Ii[i];
4300     Iii = Ii[i];
4301     ldi = ld[i];
4302     md  = Adi[i + 1] - Adi[i];
4303     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4304     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4305     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4306     ad += md;
4307     ao += nnz - md;
4308   }
4309   nooffprocentries      = mat->nooffprocentries;
4310   mat->nooffprocentries = PETSC_TRUE;
4311   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4312   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4313   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4314   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4315   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4316   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4317   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4318   mat->nooffprocentries = nooffprocentries;
4319   PetscFunctionReturn(PETSC_SUCCESS);
4320 }
4321 
4322 /*@
4323   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4324 
4325   Collective
4326 
4327   Input Parameters:
4328 + mat - the matrix
4329 - v   - matrix values, stored by row
4330 
4331   Level: intermediate
4332 
4333   Notes:
4334   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4335 
4336   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4337 
4338 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4339           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4340 @*/
4341 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4342 {
4343   PetscInt        nnz, i, m;
4344   PetscBool       nooffprocentries;
4345   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4346   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4347   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4348   PetscScalar    *ad, *ao;
4349   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4350   PetscInt        ldi, Iii, md;
4351   PetscInt       *ld = Aij->ld;
4352 
4353   PetscFunctionBegin;
4354   m = mat->rmap->n;
4355 
4356   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4357   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4358   Iii = 0;
4359   for (i = 0; i < m; i++) {
4360     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4361     ldi = ld[i];
4362     md  = Adi[i + 1] - Adi[i];
4363     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4364     ad += md;
4365     if (ao) {
4366       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4367       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4368       ao += nnz - md;
4369     }
4370     Iii += nnz;
4371   }
4372   nooffprocentries      = mat->nooffprocentries;
4373   mat->nooffprocentries = PETSC_TRUE;
4374   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4375   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4376   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4377   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4378   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4379   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4380   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4381   mat->nooffprocentries = nooffprocentries;
4382   PetscFunctionReturn(PETSC_SUCCESS);
4383 }
4384 
4385 /*@
4386   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4387   (the default parallel PETSc format).  For good matrix assembly performance
4388   the user should preallocate the matrix storage by setting the parameters
4389   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4390 
4391   Collective
4392 
4393   Input Parameters:
4394 + comm  - MPI communicator
4395 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4396           This value should be the same as the local size used in creating the
4397           y vector for the matrix-vector product y = Ax.
4398 . n     - This value should be the same as the local size used in creating the
4399           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4400           calculated if N is given) For square matrices n is almost always m.
4401 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4402 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4403 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4404           (same value is used for all local rows)
4405 . d_nnz - array containing the number of nonzeros in the various rows of the
4406           DIAGONAL portion of the local submatrix (possibly different for each row)
4407           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4408           The size of this array is equal to the number of local rows, i.e 'm'.
4409 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4410           submatrix (same value is used for all local rows).
4411 - o_nnz - array containing the number of nonzeros in the various rows of the
4412           OFF-DIAGONAL portion of the local submatrix (possibly different for
4413           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4414           structure. The size of this array is equal to the number
4415           of local rows, i.e 'm'.
4416 
4417   Output Parameter:
4418 . A - the matrix
4419 
4420   Options Database Keys:
4421 + -mat_no_inode                     - Do not use inodes
4422 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4423 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4424                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4425                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4426 
4427   Level: intermediate
4428 
4429   Notes:
4430   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4431   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4432   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4433 
4434   If the *_nnz parameter is given then the *_nz parameter is ignored
4435 
4436   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4437   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4438   storage requirements for this matrix.
4439 
4440   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4441   processor than it must be used on all processors that share the object for
4442   that argument.
4443 
4444   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4445   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4446 
4447   The user MUST specify either the local or global matrix dimensions
4448   (possibly both).
4449 
4450   The parallel matrix is partitioned across processors such that the
4451   first `m0` rows belong to process 0, the next `m1` rows belong to
4452   process 1, the next `m2` rows belong to process 2, etc., where
4453   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4454   values corresponding to [m x N] submatrix.
4455 
4456   The columns are logically partitioned with the n0 columns belonging
4457   to 0th partition, the next n1 columns belonging to the next
4458   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4459 
4460   The DIAGONAL portion of the local submatrix on any given processor
4461   is the submatrix corresponding to the rows and columns m,n
4462   corresponding to the given processor. i.e diagonal matrix on
4463   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4464   etc. The remaining portion of the local submatrix [m x (N-n)]
4465   constitute the OFF-DIAGONAL portion. The example below better
4466   illustrates this concept. The two matrices, the DIAGONAL portion and
4467   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4468 
4469   For a square global matrix we define each processor's diagonal portion
4470   to be its local rows and the corresponding columns (a square submatrix);
4471   each processor's off-diagonal portion encompasses the remainder of the
4472   local matrix (a rectangular submatrix).
4473 
4474   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4475 
4476   When calling this routine with a single process communicator, a matrix of
4477   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4478   type of communicator, use the construction mechanism
4479 .vb
4480   MatCreate(..., &A);
4481   MatSetType(A, MATMPIAIJ);
4482   MatSetSizes(A, m, n, M, N);
4483   MatMPIAIJSetPreallocation(A, ...);
4484 .ve
4485 
4486   By default, this format uses inodes (identical nodes) when possible.
4487   We search for consecutive rows with the same nonzero structure, thereby
4488   reusing matrix information to achieve increased efficiency.
4489 
4490   Example Usage:
4491   Consider the following 8x8 matrix with 34 non-zero values, that is
4492   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4493   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4494   as follows
4495 
4496 .vb
4497             1  2  0  |  0  3  0  |  0  4
4498     Proc0   0  5  6  |  7  0  0  |  8  0
4499             9  0 10  | 11  0  0  | 12  0
4500     -------------------------------------
4501            13  0 14  | 15 16 17  |  0  0
4502     Proc1   0 18  0  | 19 20 21  |  0  0
4503             0  0  0  | 22 23  0  | 24  0
4504     -------------------------------------
4505     Proc2  25 26 27  |  0  0 28  | 29  0
4506            30  0  0  | 31 32 33  |  0 34
4507 .ve
4508 
4509   This can be represented as a collection of submatrices as
4510 
4511 .vb
4512       A B C
4513       D E F
4514       G H I
4515 .ve
4516 
4517   Where the submatrices A,B,C are owned by proc0, D,E,F are
4518   owned by proc1, G,H,I are owned by proc2.
4519 
4520   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4521   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4522   The 'M','N' parameters are 8,8, and have the same values on all procs.
4523 
4524   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4525   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4526   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4527   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4528   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4529   matrix, and [DF] as another SeqAIJ matrix.
4530 
4531   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4532   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4533   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4534   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4535   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4536   In this case, the values of `d_nz`,`o_nz` are
4537 .vb
4538      proc0  dnz = 2, o_nz = 2
4539      proc1  dnz = 3, o_nz = 2
4540      proc2  dnz = 1, o_nz = 4
4541 .ve
4542   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4543   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4544   for proc3. i.e we are using 12+15+10=37 storage locations to store
4545   34 values.
4546 
4547   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4548   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4549   In the above case the values for d_nnz,o_nnz are
4550 .vb
4551      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4552      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4553      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4554 .ve
4555   Here the space allocated is sum of all the above values i.e 34, and
4556   hence pre-allocation is perfect.
4557 
4558 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4559           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4560           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4561 @*/
4562 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4563 {
4564   PetscMPIInt size;
4565 
4566   PetscFunctionBegin;
4567   PetscCall(MatCreate(comm, A));
4568   PetscCall(MatSetSizes(*A, m, n, M, N));
4569   PetscCallMPI(MPI_Comm_size(comm, &size));
4570   if (size > 1) {
4571     PetscCall(MatSetType(*A, MATMPIAIJ));
4572     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4573   } else {
4574     PetscCall(MatSetType(*A, MATSEQAIJ));
4575     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4576   }
4577   PetscFunctionReturn(PETSC_SUCCESS);
4578 }
4579 
4580 /*MC
4581     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4582 
4583     Synopsis:
4584     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4585 
4586     Not Collective
4587 
4588     Input Parameter:
4589 .   A - the `MATMPIAIJ` matrix
4590 
4591     Output Parameters:
4592 +   Ad - the diagonal portion of the matrix
4593 .   Ao - the off-diagonal portion of the matrix
4594 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4595 -   ierr - error code
4596 
4597      Level: advanced
4598 
4599     Note:
4600     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4601 
4602 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4603 M*/
4604 
4605 /*MC
4606     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4607 
4608     Synopsis:
4609     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4610 
4611     Not Collective
4612 
4613     Input Parameters:
4614 +   A - the `MATMPIAIJ` matrix
4615 .   Ad - the diagonal portion of the matrix
4616 .   Ao - the off-diagonal portion of the matrix
4617 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4618 -   ierr - error code
4619 
4620      Level: advanced
4621 
4622 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4623 M*/
4624 
4625 /*@C
4626   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4627 
4628   Not Collective
4629 
4630   Input Parameter:
4631 . A - The `MATMPIAIJ` matrix
4632 
4633   Output Parameters:
4634 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4635 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4636 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4637 
4638   Level: intermediate
4639 
4640   Note:
4641   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4642   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4643   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4644   local column numbers to global column numbers in the original matrix.
4645 
4646   Fortran Notes:
4647   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4648 
4649 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4650 @*/
4651 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4652 {
4653   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4654   PetscBool   flg;
4655 
4656   PetscFunctionBegin;
4657   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4658   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4659   if (Ad) *Ad = a->A;
4660   if (Ao) *Ao = a->B;
4661   if (colmap) *colmap = a->garray;
4662   PetscFunctionReturn(PETSC_SUCCESS);
4663 }
4664 
4665 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4666 {
4667   PetscInt     m, N, i, rstart, nnz, Ii;
4668   PetscInt    *indx;
4669   PetscScalar *values;
4670   MatType      rootType;
4671 
4672   PetscFunctionBegin;
4673   PetscCall(MatGetSize(inmat, &m, &N));
4674   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4675     PetscInt *dnz, *onz, sum, bs, cbs;
4676 
4677     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4678     /* Check sum(n) = N */
4679     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4680     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4681 
4682     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4683     rstart -= m;
4684 
4685     MatPreallocateBegin(comm, m, n, dnz, onz);
4686     for (i = 0; i < m; i++) {
4687       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4688       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4689       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4690     }
4691 
4692     PetscCall(MatCreate(comm, outmat));
4693     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4694     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4695     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4696     PetscCall(MatGetRootType_Private(inmat, &rootType));
4697     PetscCall(MatSetType(*outmat, rootType));
4698     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4699     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4700     MatPreallocateEnd(dnz, onz);
4701     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4702   }
4703 
4704   /* numeric phase */
4705   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4706   for (i = 0; i < m; i++) {
4707     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4708     Ii = i + rstart;
4709     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4710     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4711   }
4712   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4713   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4714   PetscFunctionReturn(PETSC_SUCCESS);
4715 }
4716 
4717 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4718 {
4719   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4720 
4721   PetscFunctionBegin;
4722   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4723   PetscCall(PetscFree(merge->id_r));
4724   PetscCall(PetscFree(merge->len_s));
4725   PetscCall(PetscFree(merge->len_r));
4726   PetscCall(PetscFree(merge->bi));
4727   PetscCall(PetscFree(merge->bj));
4728   PetscCall(PetscFree(merge->buf_ri[0]));
4729   PetscCall(PetscFree(merge->buf_ri));
4730   PetscCall(PetscFree(merge->buf_rj[0]));
4731   PetscCall(PetscFree(merge->buf_rj));
4732   PetscCall(PetscFree(merge->coi));
4733   PetscCall(PetscFree(merge->coj));
4734   PetscCall(PetscFree(merge->owners_co));
4735   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4736   PetscCall(PetscFree(merge));
4737   PetscFunctionReturn(PETSC_SUCCESS);
4738 }
4739 
4740 #include <../src/mat/utils/freespace.h>
4741 #include <petscbt.h>
4742 
4743 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4744 {
4745   MPI_Comm             comm;
4746   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4747   PetscMPIInt          size, rank, taga, *len_s;
4748   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4749   PetscMPIInt          proc, k;
4750   PetscInt           **buf_ri, **buf_rj;
4751   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4752   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4753   MPI_Request         *s_waits, *r_waits;
4754   MPI_Status          *status;
4755   const MatScalar     *aa, *a_a;
4756   MatScalar          **abuf_r, *ba_i;
4757   Mat_Merge_SeqsToMPI *merge;
4758   PetscContainer       container;
4759 
4760   PetscFunctionBegin;
4761   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4762   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4763 
4764   PetscCallMPI(MPI_Comm_size(comm, &size));
4765   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4766 
4767   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4768   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4769   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4770   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4771   aa = a_a;
4772 
4773   bi     = merge->bi;
4774   bj     = merge->bj;
4775   buf_ri = merge->buf_ri;
4776   buf_rj = merge->buf_rj;
4777 
4778   PetscCall(PetscMalloc1(size, &status));
4779   owners = merge->rowmap->range;
4780   len_s  = merge->len_s;
4781 
4782   /* send and recv matrix values */
4783   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4784   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4785 
4786   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4787   for (proc = 0, k = 0; proc < size; proc++) {
4788     if (!len_s[proc]) continue;
4789     i = owners[proc];
4790     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4791     k++;
4792   }
4793 
4794   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4795   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4796   PetscCall(PetscFree(status));
4797 
4798   PetscCall(PetscFree(s_waits));
4799   PetscCall(PetscFree(r_waits));
4800 
4801   /* insert mat values of mpimat */
4802   PetscCall(PetscMalloc1(N, &ba_i));
4803   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4804 
4805   for (k = 0; k < merge->nrecv; k++) {
4806     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4807     nrows       = *buf_ri_k[k];
4808     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4809     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4810   }
4811 
4812   /* set values of ba */
4813   m = merge->rowmap->n;
4814   for (i = 0; i < m; i++) {
4815     arow = owners[rank] + i;
4816     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4817     bnzi = bi[i + 1] - bi[i];
4818     PetscCall(PetscArrayzero(ba_i, bnzi));
4819 
4820     /* add local non-zero vals of this proc's seqmat into ba */
4821     anzi   = ai[arow + 1] - ai[arow];
4822     aj     = a->j + ai[arow];
4823     aa     = a_a + ai[arow];
4824     nextaj = 0;
4825     for (j = 0; nextaj < anzi; j++) {
4826       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4827         ba_i[j] += aa[nextaj++];
4828       }
4829     }
4830 
4831     /* add received vals into ba */
4832     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4833       /* i-th row */
4834       if (i == *nextrow[k]) {
4835         anzi   = *(nextai[k] + 1) - *nextai[k];
4836         aj     = buf_rj[k] + *nextai[k];
4837         aa     = abuf_r[k] + *nextai[k];
4838         nextaj = 0;
4839         for (j = 0; nextaj < anzi; j++) {
4840           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4841             ba_i[j] += aa[nextaj++];
4842           }
4843         }
4844         nextrow[k]++;
4845         nextai[k]++;
4846       }
4847     }
4848     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4849   }
4850   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4851   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4852   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4853 
4854   PetscCall(PetscFree(abuf_r[0]));
4855   PetscCall(PetscFree(abuf_r));
4856   PetscCall(PetscFree(ba_i));
4857   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4858   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4859   PetscFunctionReturn(PETSC_SUCCESS);
4860 }
4861 
4862 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4863 {
4864   Mat                  B_mpi;
4865   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4866   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4867   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4868   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4869   PetscInt             len, *dnz, *onz, bs, cbs;
4870   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4871   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4872   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4873   MPI_Status          *status;
4874   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4875   PetscBT              lnkbt;
4876   Mat_Merge_SeqsToMPI *merge;
4877   PetscContainer       container;
4878 
4879   PetscFunctionBegin;
4880   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4881 
4882   /* make sure it is a PETSc comm */
4883   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4884   PetscCallMPI(MPI_Comm_size(comm, &size));
4885   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4886 
4887   PetscCall(PetscNew(&merge));
4888   PetscCall(PetscMalloc1(size, &status));
4889 
4890   /* determine row ownership */
4891   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4892   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4893   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4894   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4895   PetscCall(PetscLayoutSetUp(merge->rowmap));
4896   PetscCall(PetscMalloc1(size, &len_si));
4897   PetscCall(PetscMalloc1(size, &merge->len_s));
4898 
4899   m      = merge->rowmap->n;
4900   owners = merge->rowmap->range;
4901 
4902   /* determine the number of messages to send, their lengths */
4903   len_s = merge->len_s;
4904 
4905   len          = 0; /* length of buf_si[] */
4906   merge->nsend = 0;
4907   for (PetscMPIInt proc = 0; proc < size; proc++) {
4908     len_si[proc] = 0;
4909     if (proc == rank) {
4910       len_s[proc] = 0;
4911     } else {
4912       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4913       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4914     }
4915     if (len_s[proc]) {
4916       merge->nsend++;
4917       nrows = 0;
4918       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4919         if (ai[i + 1] > ai[i]) nrows++;
4920       }
4921       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4922       len += len_si[proc];
4923     }
4924   }
4925 
4926   /* determine the number and length of messages to receive for ij-structure */
4927   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4928   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4929 
4930   /* post the Irecv of j-structure */
4931   PetscCall(PetscCommGetNewTag(comm, &tagj));
4932   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4933 
4934   /* post the Isend of j-structure */
4935   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4936 
4937   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4938     if (!len_s[proc]) continue;
4939     i = owners[proc];
4940     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4941     k++;
4942   }
4943 
4944   /* receives and sends of j-structure are complete */
4945   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4946   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4947 
4948   /* send and recv i-structure */
4949   PetscCall(PetscCommGetNewTag(comm, &tagi));
4950   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4951 
4952   PetscCall(PetscMalloc1(len + 1, &buf_s));
4953   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4954   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4955     if (!len_s[proc]) continue;
4956     /* form outgoing message for i-structure:
4957          buf_si[0]:                 nrows to be sent
4958                [1:nrows]:           row index (global)
4959                [nrows+1:2*nrows+1]: i-structure index
4960     */
4961     nrows       = len_si[proc] / 2 - 1;
4962     buf_si_i    = buf_si + nrows + 1;
4963     buf_si[0]   = nrows;
4964     buf_si_i[0] = 0;
4965     nrows       = 0;
4966     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4967       anzi = ai[i + 1] - ai[i];
4968       if (anzi) {
4969         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4970         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4971         nrows++;
4972       }
4973     }
4974     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4975     k++;
4976     buf_si += len_si[proc];
4977   }
4978 
4979   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4980   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4981 
4982   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4983   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4984 
4985   PetscCall(PetscFree(len_si));
4986   PetscCall(PetscFree(len_ri));
4987   PetscCall(PetscFree(rj_waits));
4988   PetscCall(PetscFree2(si_waits, sj_waits));
4989   PetscCall(PetscFree(ri_waits));
4990   PetscCall(PetscFree(buf_s));
4991   PetscCall(PetscFree(status));
4992 
4993   /* compute a local seq matrix in each processor */
4994   /* allocate bi array and free space for accumulating nonzero column info */
4995   PetscCall(PetscMalloc1(m + 1, &bi));
4996   bi[0] = 0;
4997 
4998   /* create and initialize a linked list */
4999   nlnk = N + 1;
5000   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
5001 
5002   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5003   len = ai[owners[rank + 1]] - ai[owners[rank]];
5004   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
5005 
5006   current_space = free_space;
5007 
5008   /* determine symbolic info for each local row */
5009   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
5010 
5011   for (k = 0; k < merge->nrecv; k++) {
5012     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5013     nrows       = *buf_ri_k[k];
5014     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5015     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5016   }
5017 
5018   MatPreallocateBegin(comm, m, n, dnz, onz);
5019   len = 0;
5020   for (i = 0; i < m; i++) {
5021     bnzi = 0;
5022     /* add local non-zero cols of this proc's seqmat into lnk */
5023     arow = owners[rank] + i;
5024     anzi = ai[arow + 1] - ai[arow];
5025     aj   = a->j + ai[arow];
5026     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5027     bnzi += nlnk;
5028     /* add received col data into lnk */
5029     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5030       if (i == *nextrow[k]) {            /* i-th row */
5031         anzi = *(nextai[k] + 1) - *nextai[k];
5032         aj   = buf_rj[k] + *nextai[k];
5033         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5034         bnzi += nlnk;
5035         nextrow[k]++;
5036         nextai[k]++;
5037       }
5038     }
5039     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5040 
5041     /* if free space is not available, make more free space */
5042     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5043     /* copy data into free space, then initialize lnk */
5044     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5045     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5046 
5047     current_space->array += bnzi;
5048     current_space->local_used += bnzi;
5049     current_space->local_remaining -= bnzi;
5050 
5051     bi[i + 1] = bi[i] + bnzi;
5052   }
5053 
5054   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5055 
5056   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5057   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5058   PetscCall(PetscLLDestroy(lnk, lnkbt));
5059 
5060   /* create symbolic parallel matrix B_mpi */
5061   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5062   PetscCall(MatCreate(comm, &B_mpi));
5063   if (n == PETSC_DECIDE) {
5064     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5065   } else {
5066     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5067   }
5068   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5069   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5070   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5071   MatPreallocateEnd(dnz, onz);
5072   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5073 
5074   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5075   B_mpi->assembled = PETSC_FALSE;
5076   merge->bi        = bi;
5077   merge->bj        = bj;
5078   merge->buf_ri    = buf_ri;
5079   merge->buf_rj    = buf_rj;
5080   merge->coi       = NULL;
5081   merge->coj       = NULL;
5082   merge->owners_co = NULL;
5083 
5084   PetscCall(PetscCommDestroy(&comm));
5085 
5086   /* attach the supporting struct to B_mpi for reuse */
5087   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5088   PetscCall(PetscContainerSetPointer(container, merge));
5089   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5090   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5091   PetscCall(PetscContainerDestroy(&container));
5092   *mpimat = B_mpi;
5093 
5094   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5095   PetscFunctionReturn(PETSC_SUCCESS);
5096 }
5097 
5098 /*@
5099   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5100   matrices from each processor
5101 
5102   Collective
5103 
5104   Input Parameters:
5105 + comm   - the communicators the parallel matrix will live on
5106 . seqmat - the input sequential matrices
5107 . m      - number of local rows (or `PETSC_DECIDE`)
5108 . n      - number of local columns (or `PETSC_DECIDE`)
5109 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5110 
5111   Output Parameter:
5112 . mpimat - the parallel matrix generated
5113 
5114   Level: advanced
5115 
5116   Note:
5117   The dimensions of the sequential matrix in each processor MUST be the same.
5118   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5119   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5120 
5121 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5122 @*/
5123 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5124 {
5125   PetscMPIInt size;
5126 
5127   PetscFunctionBegin;
5128   PetscCallMPI(MPI_Comm_size(comm, &size));
5129   if (size == 1) {
5130     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5131     if (scall == MAT_INITIAL_MATRIX) {
5132       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5133     } else {
5134       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5135     }
5136     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5137     PetscFunctionReturn(PETSC_SUCCESS);
5138   }
5139   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5140   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5141   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5142   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5143   PetscFunctionReturn(PETSC_SUCCESS);
5144 }
5145 
5146 /*@
5147   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5148 
5149   Not Collective
5150 
5151   Input Parameter:
5152 . A - the matrix
5153 
5154   Output Parameter:
5155 . A_loc - the local sequential matrix generated
5156 
5157   Level: developer
5158 
5159   Notes:
5160   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5161   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5162   `n` is the global column count obtained with `MatGetSize()`
5163 
5164   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5165 
5166   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5167 
5168   Destroy the matrix with `MatDestroy()`
5169 
5170 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5171 @*/
5172 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5173 {
5174   PetscBool mpi;
5175 
5176   PetscFunctionBegin;
5177   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5178   if (mpi) {
5179     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5180   } else {
5181     *A_loc = A;
5182     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5183   }
5184   PetscFunctionReturn(PETSC_SUCCESS);
5185 }
5186 
5187 /*@
5188   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5189 
5190   Not Collective
5191 
5192   Input Parameters:
5193 + A     - the matrix
5194 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5195 
5196   Output Parameter:
5197 . A_loc - the local sequential matrix generated
5198 
5199   Level: developer
5200 
5201   Notes:
5202   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5203   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5204   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5205 
5206   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5207 
5208   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5209   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5210   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5211   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5212 
5213 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5214 @*/
5215 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5216 {
5217   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5218   Mat_SeqAIJ        *mat, *a, *b;
5219   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5220   const PetscScalar *aa, *ba, *aav, *bav;
5221   PetscScalar       *ca, *cam;
5222   PetscMPIInt        size;
5223   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5224   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5225   PetscBool          match;
5226 
5227   PetscFunctionBegin;
5228   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5229   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5230   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5231   if (size == 1) {
5232     if (scall == MAT_INITIAL_MATRIX) {
5233       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5234       *A_loc = mpimat->A;
5235     } else if (scall == MAT_REUSE_MATRIX) {
5236       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5237     }
5238     PetscFunctionReturn(PETSC_SUCCESS);
5239   }
5240 
5241   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5242   a  = (Mat_SeqAIJ *)mpimat->A->data;
5243   b  = (Mat_SeqAIJ *)mpimat->B->data;
5244   ai = a->i;
5245   aj = a->j;
5246   bi = b->i;
5247   bj = b->j;
5248   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5249   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5250   aa = aav;
5251   ba = bav;
5252   if (scall == MAT_INITIAL_MATRIX) {
5253     PetscCall(PetscMalloc1(1 + am, &ci));
5254     ci[0] = 0;
5255     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5256     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5257     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5258     k = 0;
5259     for (i = 0; i < am; i++) {
5260       ncols_o = bi[i + 1] - bi[i];
5261       ncols_d = ai[i + 1] - ai[i];
5262       /* off-diagonal portion of A */
5263       for (jo = 0; jo < ncols_o; jo++) {
5264         col = cmap[*bj];
5265         if (col >= cstart) break;
5266         cj[k] = col;
5267         bj++;
5268         ca[k++] = *ba++;
5269       }
5270       /* diagonal portion of A */
5271       for (j = 0; j < ncols_d; j++) {
5272         cj[k]   = cstart + *aj++;
5273         ca[k++] = *aa++;
5274       }
5275       /* off-diagonal portion of A */
5276       for (j = jo; j < ncols_o; j++) {
5277         cj[k]   = cmap[*bj++];
5278         ca[k++] = *ba++;
5279       }
5280     }
5281     /* put together the new matrix */
5282     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5283     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5284     /* Since these are PETSc arrays, change flags to free them as necessary. */
5285     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5286     mat->free_a  = PETSC_TRUE;
5287     mat->free_ij = PETSC_TRUE;
5288     mat->nonew   = 0;
5289   } else if (scall == MAT_REUSE_MATRIX) {
5290     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5291     ci  = mat->i;
5292     cj  = mat->j;
5293     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5294     for (i = 0; i < am; i++) {
5295       /* off-diagonal portion of A */
5296       ncols_o = bi[i + 1] - bi[i];
5297       for (jo = 0; jo < ncols_o; jo++) {
5298         col = cmap[*bj];
5299         if (col >= cstart) break;
5300         *cam++ = *ba++;
5301         bj++;
5302       }
5303       /* diagonal portion of A */
5304       ncols_d = ai[i + 1] - ai[i];
5305       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5306       /* off-diagonal portion of A */
5307       for (j = jo; j < ncols_o; j++) {
5308         *cam++ = *ba++;
5309         bj++;
5310       }
5311     }
5312     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5313   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5314   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5315   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5316   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5317   PetscFunctionReturn(PETSC_SUCCESS);
5318 }
5319 
5320 /*@
5321   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5322   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5323 
5324   Not Collective
5325 
5326   Input Parameters:
5327 + A     - the matrix
5328 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5329 
5330   Output Parameters:
5331 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5332 - A_loc - the local sequential matrix generated
5333 
5334   Level: developer
5335 
5336   Note:
5337   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5338   part, then those associated with the off-diagonal part (in its local ordering)
5339 
5340 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5341 @*/
5342 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5343 {
5344   Mat             Ao, Ad;
5345   const PetscInt *cmap;
5346   PetscMPIInt     size;
5347   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5348 
5349   PetscFunctionBegin;
5350   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5351   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5352   if (size == 1) {
5353     if (scall == MAT_INITIAL_MATRIX) {
5354       PetscCall(PetscObjectReference((PetscObject)Ad));
5355       *A_loc = Ad;
5356     } else if (scall == MAT_REUSE_MATRIX) {
5357       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5358     }
5359     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5360     PetscFunctionReturn(PETSC_SUCCESS);
5361   }
5362   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5363   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5364   if (f) {
5365     PetscCall((*f)(A, scall, glob, A_loc));
5366   } else {
5367     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5368     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5369     Mat_SeqAIJ        *c;
5370     PetscInt          *ai = a->i, *aj = a->j;
5371     PetscInt          *bi = b->i, *bj = b->j;
5372     PetscInt          *ci, *cj;
5373     const PetscScalar *aa, *ba;
5374     PetscScalar       *ca;
5375     PetscInt           i, j, am, dn, on;
5376 
5377     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5378     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5379     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5380     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5381     if (scall == MAT_INITIAL_MATRIX) {
5382       PetscInt k;
5383       PetscCall(PetscMalloc1(1 + am, &ci));
5384       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5385       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5386       ci[0] = 0;
5387       for (i = 0, k = 0; i < am; i++) {
5388         const PetscInt ncols_o = bi[i + 1] - bi[i];
5389         const PetscInt ncols_d = ai[i + 1] - ai[i];
5390         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5391         /* diagonal portion of A */
5392         for (j = 0; j < ncols_d; j++, k++) {
5393           cj[k] = *aj++;
5394           ca[k] = *aa++;
5395         }
5396         /* off-diagonal portion of A */
5397         for (j = 0; j < ncols_o; j++, k++) {
5398           cj[k] = dn + *bj++;
5399           ca[k] = *ba++;
5400         }
5401       }
5402       /* put together the new matrix */
5403       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5404       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5405       /* Since these are PETSc arrays, change flags to free them as necessary. */
5406       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5407       c->free_a  = PETSC_TRUE;
5408       c->free_ij = PETSC_TRUE;
5409       c->nonew   = 0;
5410       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5411     } else if (scall == MAT_REUSE_MATRIX) {
5412       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5413       for (i = 0; i < am; i++) {
5414         const PetscInt ncols_d = ai[i + 1] - ai[i];
5415         const PetscInt ncols_o = bi[i + 1] - bi[i];
5416         /* diagonal portion of A */
5417         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5418         /* off-diagonal portion of A */
5419         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5420       }
5421       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5422     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5423     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5424     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5425     if (glob) {
5426       PetscInt cst, *gidx;
5427 
5428       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5429       PetscCall(PetscMalloc1(dn + on, &gidx));
5430       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5431       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5432       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5433     }
5434   }
5435   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5436   PetscFunctionReturn(PETSC_SUCCESS);
5437 }
5438 
5439 /*@C
5440   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5441 
5442   Not Collective
5443 
5444   Input Parameters:
5445 + A     - the matrix
5446 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5447 . row   - index set of rows to extract (or `NULL`)
5448 - col   - index set of columns to extract (or `NULL`)
5449 
5450   Output Parameter:
5451 . A_loc - the local sequential matrix generated
5452 
5453   Level: developer
5454 
5455 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5456 @*/
5457 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5458 {
5459   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5460   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5461   IS          isrowa, iscola;
5462   Mat        *aloc;
5463   PetscBool   match;
5464 
5465   PetscFunctionBegin;
5466   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5467   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5468   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5469   if (!row) {
5470     start = A->rmap->rstart;
5471     end   = A->rmap->rend;
5472     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5473   } else {
5474     isrowa = *row;
5475   }
5476   if (!col) {
5477     start = A->cmap->rstart;
5478     cmap  = a->garray;
5479     nzA   = a->A->cmap->n;
5480     nzB   = a->B->cmap->n;
5481     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5482     ncols = 0;
5483     for (i = 0; i < nzB; i++) {
5484       if (cmap[i] < start) idx[ncols++] = cmap[i];
5485       else break;
5486     }
5487     imark = i;
5488     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5489     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5490     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5491   } else {
5492     iscola = *col;
5493   }
5494   if (scall != MAT_INITIAL_MATRIX) {
5495     PetscCall(PetscMalloc1(1, &aloc));
5496     aloc[0] = *A_loc;
5497   }
5498   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5499   if (!col) { /* attach global id of condensed columns */
5500     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5501   }
5502   *A_loc = aloc[0];
5503   PetscCall(PetscFree(aloc));
5504   if (!row) PetscCall(ISDestroy(&isrowa));
5505   if (!col) PetscCall(ISDestroy(&iscola));
5506   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5507   PetscFunctionReturn(PETSC_SUCCESS);
5508 }
5509 
5510 /*
5511  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5512  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5513  * on a global size.
5514  * */
5515 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5516 {
5517   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5518   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5519   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5520   PetscMPIInt            owner;
5521   PetscSFNode           *iremote, *oiremote;
5522   const PetscInt        *lrowindices;
5523   PetscSF                sf, osf;
5524   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5525   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5526   MPI_Comm               comm;
5527   ISLocalToGlobalMapping mapping;
5528   const PetscScalar     *pd_a, *po_a;
5529 
5530   PetscFunctionBegin;
5531   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5532   /* plocalsize is the number of roots
5533    * nrows is the number of leaves
5534    * */
5535   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5536   PetscCall(ISGetLocalSize(rows, &nrows));
5537   PetscCall(PetscCalloc1(nrows, &iremote));
5538   PetscCall(ISGetIndices(rows, &lrowindices));
5539   for (i = 0; i < nrows; i++) {
5540     /* Find a remote index and an owner for a row
5541      * The row could be local or remote
5542      * */
5543     owner = 0;
5544     lidx  = 0;
5545     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5546     iremote[i].index = lidx;
5547     iremote[i].rank  = owner;
5548   }
5549   /* Create SF to communicate how many nonzero columns for each row */
5550   PetscCall(PetscSFCreate(comm, &sf));
5551   /* SF will figure out the number of nonzero columns for each row, and their
5552    * offsets
5553    * */
5554   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5555   PetscCall(PetscSFSetFromOptions(sf));
5556   PetscCall(PetscSFSetUp(sf));
5557 
5558   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5559   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5560   PetscCall(PetscCalloc1(nrows, &pnnz));
5561   roffsets[0] = 0;
5562   roffsets[1] = 0;
5563   for (i = 0; i < plocalsize; i++) {
5564     /* diagonal */
5565     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5566     /* off-diagonal */
5567     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5568     /* compute offsets so that we relative location for each row */
5569     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5570     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5571   }
5572   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5573   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5574   /* 'r' means root, and 'l' means leaf */
5575   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5576   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5577   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5578   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5579   PetscCall(PetscSFDestroy(&sf));
5580   PetscCall(PetscFree(roffsets));
5581   PetscCall(PetscFree(nrcols));
5582   dntotalcols = 0;
5583   ontotalcols = 0;
5584   ncol        = 0;
5585   for (i = 0; i < nrows; i++) {
5586     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5587     ncol    = PetscMax(pnnz[i], ncol);
5588     /* diagonal */
5589     dntotalcols += nlcols[i * 2 + 0];
5590     /* off-diagonal */
5591     ontotalcols += nlcols[i * 2 + 1];
5592   }
5593   /* We do not need to figure the right number of columns
5594    * since all the calculations will be done by going through the raw data
5595    * */
5596   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5597   PetscCall(MatSetUp(*P_oth));
5598   PetscCall(PetscFree(pnnz));
5599   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5600   /* diagonal */
5601   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5602   /* off-diagonal */
5603   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5604   /* diagonal */
5605   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5606   /* off-diagonal */
5607   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5608   dntotalcols = 0;
5609   ontotalcols = 0;
5610   ntotalcols  = 0;
5611   for (i = 0; i < nrows; i++) {
5612     owner = 0;
5613     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5614     /* Set iremote for diag matrix */
5615     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5616       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5617       iremote[dntotalcols].rank  = owner;
5618       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5619       ilocal[dntotalcols++] = ntotalcols++;
5620     }
5621     /* off-diagonal */
5622     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5623       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5624       oiremote[ontotalcols].rank  = owner;
5625       oilocal[ontotalcols++]      = ntotalcols++;
5626     }
5627   }
5628   PetscCall(ISRestoreIndices(rows, &lrowindices));
5629   PetscCall(PetscFree(loffsets));
5630   PetscCall(PetscFree(nlcols));
5631   PetscCall(PetscSFCreate(comm, &sf));
5632   /* P serves as roots and P_oth is leaves
5633    * Diag matrix
5634    * */
5635   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5636   PetscCall(PetscSFSetFromOptions(sf));
5637   PetscCall(PetscSFSetUp(sf));
5638 
5639   PetscCall(PetscSFCreate(comm, &osf));
5640   /* off-diagonal */
5641   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5642   PetscCall(PetscSFSetFromOptions(osf));
5643   PetscCall(PetscSFSetUp(osf));
5644   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5645   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5646   /* operate on the matrix internal data to save memory */
5647   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5648   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5649   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5650   /* Convert to global indices for diag matrix */
5651   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5652   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5653   /* We want P_oth store global indices */
5654   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5655   /* Use memory scalable approach */
5656   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5657   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5658   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5659   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5660   /* Convert back to local indices */
5661   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5662   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5663   nout = 0;
5664   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5665   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5666   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5667   /* Exchange values */
5668   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5669   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5670   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5671   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5672   /* Stop PETSc from shrinking memory */
5673   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5674   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5675   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5676   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5677   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5678   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5679   PetscCall(PetscSFDestroy(&sf));
5680   PetscCall(PetscSFDestroy(&osf));
5681   PetscFunctionReturn(PETSC_SUCCESS);
5682 }
5683 
5684 /*
5685  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5686  * This supports MPIAIJ and MAIJ
5687  * */
5688 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5689 {
5690   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5691   Mat_SeqAIJ *p_oth;
5692   IS          rows, map;
5693   PetscHMapI  hamp;
5694   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5695   MPI_Comm    comm;
5696   PetscSF     sf, osf;
5697   PetscBool   has;
5698 
5699   PetscFunctionBegin;
5700   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5701   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5702   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5703    *  and then create a submatrix (that often is an overlapping matrix)
5704    * */
5705   if (reuse == MAT_INITIAL_MATRIX) {
5706     /* Use a hash table to figure out unique keys */
5707     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5708     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5709     count = 0;
5710     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5711     for (i = 0; i < a->B->cmap->n; i++) {
5712       key = a->garray[i] / dof;
5713       PetscCall(PetscHMapIHas(hamp, key, &has));
5714       if (!has) {
5715         mapping[i] = count;
5716         PetscCall(PetscHMapISet(hamp, key, count++));
5717       } else {
5718         /* Current 'i' has the same value the previous step */
5719         mapping[i] = count - 1;
5720       }
5721     }
5722     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5723     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5724     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5725     PetscCall(PetscCalloc1(htsize, &rowindices));
5726     off = 0;
5727     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5728     PetscCall(PetscHMapIDestroy(&hamp));
5729     PetscCall(PetscSortInt(htsize, rowindices));
5730     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5731     /* In case, the matrix was already created but users want to recreate the matrix */
5732     PetscCall(MatDestroy(P_oth));
5733     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5734     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5735     PetscCall(ISDestroy(&map));
5736     PetscCall(ISDestroy(&rows));
5737   } else if (reuse == MAT_REUSE_MATRIX) {
5738     /* If matrix was already created, we simply update values using SF objects
5739      * that as attached to the matrix earlier.
5740      */
5741     const PetscScalar *pd_a, *po_a;
5742 
5743     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5744     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5745     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5746     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5747     /* Update values in place */
5748     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5749     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5750     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5751     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5752     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5753     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5754     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5755     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5756   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5757   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5758   PetscFunctionReturn(PETSC_SUCCESS);
5759 }
5760 
5761 /*@C
5762   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5763 
5764   Collective
5765 
5766   Input Parameters:
5767 + A     - the first matrix in `MATMPIAIJ` format
5768 . B     - the second matrix in `MATMPIAIJ` format
5769 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5770 
5771   Output Parameters:
5772 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5773 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5774 - B_seq - the sequential matrix generated
5775 
5776   Level: developer
5777 
5778 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5779 @*/
5780 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5781 {
5782   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5783   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5784   IS          isrowb, iscolb;
5785   Mat        *bseq = NULL;
5786 
5787   PetscFunctionBegin;
5788   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5789              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5790   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5791 
5792   if (scall == MAT_INITIAL_MATRIX) {
5793     start = A->cmap->rstart;
5794     cmap  = a->garray;
5795     nzA   = a->A->cmap->n;
5796     nzB   = a->B->cmap->n;
5797     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5798     ncols = 0;
5799     for (i = 0; i < nzB; i++) { /* row < local row index */
5800       if (cmap[i] < start) idx[ncols++] = cmap[i];
5801       else break;
5802     }
5803     imark = i;
5804     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5805     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5806     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5807     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5808   } else {
5809     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5810     isrowb = *rowb;
5811     iscolb = *colb;
5812     PetscCall(PetscMalloc1(1, &bseq));
5813     bseq[0] = *B_seq;
5814   }
5815   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5816   *B_seq = bseq[0];
5817   PetscCall(PetscFree(bseq));
5818   if (!rowb) {
5819     PetscCall(ISDestroy(&isrowb));
5820   } else {
5821     *rowb = isrowb;
5822   }
5823   if (!colb) {
5824     PetscCall(ISDestroy(&iscolb));
5825   } else {
5826     *colb = iscolb;
5827   }
5828   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5829   PetscFunctionReturn(PETSC_SUCCESS);
5830 }
5831 
5832 /*
5833     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5834     of the OFF-DIAGONAL portion of local A
5835 
5836     Collective
5837 
5838    Input Parameters:
5839 +    A,B - the matrices in `MATMPIAIJ` format
5840 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5841 
5842    Output Parameter:
5843 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5844 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5845 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5846 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5847 
5848     Developer Note:
5849     This directly accesses information inside the VecScatter associated with the matrix-vector product
5850      for this matrix. This is not desirable..
5851 
5852     Level: developer
5853 
5854 */
5855 
5856 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5857 {
5858   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5859   VecScatter         ctx;
5860   MPI_Comm           comm;
5861   const PetscMPIInt *rprocs, *sprocs;
5862   PetscMPIInt        nrecvs, nsends;
5863   const PetscInt    *srow, *rstarts, *sstarts;
5864   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5865   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5866   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5867   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5868   PetscMPIInt        size, tag, rank, nreqs;
5869 
5870   PetscFunctionBegin;
5871   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5872   PetscCallMPI(MPI_Comm_size(comm, &size));
5873 
5874   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5875              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5876   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5877   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5878 
5879   if (size == 1) {
5880     startsj_s = NULL;
5881     bufa_ptr  = NULL;
5882     *B_oth    = NULL;
5883     PetscFunctionReturn(PETSC_SUCCESS);
5884   }
5885 
5886   ctx = a->Mvctx;
5887   tag = ((PetscObject)ctx)->tag;
5888 
5889   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5890   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5891   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5892   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5893   PetscCall(PetscMalloc1(nreqs, &reqs));
5894   rwaits = reqs;
5895   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5896 
5897   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5898   if (scall == MAT_INITIAL_MATRIX) {
5899     /* i-array */
5900     /*  post receives */
5901     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5902     for (i = 0; i < nrecvs; i++) {
5903       rowlen = rvalues + rstarts[i] * rbs;
5904       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5905       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5906     }
5907 
5908     /* pack the outgoing message */
5909     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5910 
5911     sstartsj[0] = 0;
5912     rstartsj[0] = 0;
5913     len         = 0; /* total length of j or a array to be sent */
5914     if (nsends) {
5915       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5916       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5917     }
5918     for (i = 0; i < nsends; i++) {
5919       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5920       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5921       for (j = 0; j < nrows; j++) {
5922         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5923         for (l = 0; l < sbs; l++) {
5924           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5925 
5926           rowlen[j * sbs + l] = ncols;
5927 
5928           len += ncols;
5929           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5930         }
5931         k++;
5932       }
5933       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5934 
5935       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5936     }
5937     /* recvs and sends of i-array are completed */
5938     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5939     PetscCall(PetscFree(svalues));
5940 
5941     /* allocate buffers for sending j and a arrays */
5942     PetscCall(PetscMalloc1(len + 1, &bufj));
5943     PetscCall(PetscMalloc1(len + 1, &bufa));
5944 
5945     /* create i-array of B_oth */
5946     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5947 
5948     b_othi[0] = 0;
5949     len       = 0; /* total length of j or a array to be received */
5950     k         = 0;
5951     for (i = 0; i < nrecvs; i++) {
5952       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5953       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5954       for (j = 0; j < nrows; j++) {
5955         b_othi[k + 1] = b_othi[k] + rowlen[j];
5956         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5957         k++;
5958       }
5959       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5960     }
5961     PetscCall(PetscFree(rvalues));
5962 
5963     /* allocate space for j and a arrays of B_oth */
5964     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5965     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5966 
5967     /* j-array */
5968     /*  post receives of j-array */
5969     for (i = 0; i < nrecvs; i++) {
5970       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5971       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5972     }
5973 
5974     /* pack the outgoing message j-array */
5975     if (nsends) k = sstarts[0];
5976     for (i = 0; i < nsends; i++) {
5977       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5978       bufJ  = bufj + sstartsj[i];
5979       for (j = 0; j < nrows; j++) {
5980         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5981         for (ll = 0; ll < sbs; ll++) {
5982           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5983           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5984           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5985         }
5986       }
5987       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5988     }
5989 
5990     /* recvs and sends of j-array are completed */
5991     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5992   } else if (scall == MAT_REUSE_MATRIX) {
5993     sstartsj = *startsj_s;
5994     rstartsj = *startsj_r;
5995     bufa     = *bufa_ptr;
5996     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5997   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5998 
5999   /* a-array */
6000   /*  post receives of a-array */
6001   for (i = 0; i < nrecvs; i++) {
6002     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
6003     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
6004   }
6005 
6006   /* pack the outgoing message a-array */
6007   if (nsends) k = sstarts[0];
6008   for (i = 0; i < nsends; i++) {
6009     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
6010     bufA  = bufa + sstartsj[i];
6011     for (j = 0; j < nrows; j++) {
6012       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6013       for (ll = 0; ll < sbs; ll++) {
6014         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6015         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6016         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6017       }
6018     }
6019     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6020   }
6021   /* recvs and sends of a-array are completed */
6022   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6023   PetscCall(PetscFree(reqs));
6024 
6025   if (scall == MAT_INITIAL_MATRIX) {
6026     Mat_SeqAIJ *b_oth;
6027 
6028     /* put together the new matrix */
6029     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6030 
6031     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6032     /* Since these are PETSc arrays, change flags to free them as necessary. */
6033     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6034     b_oth->free_a  = PETSC_TRUE;
6035     b_oth->free_ij = PETSC_TRUE;
6036     b_oth->nonew   = 0;
6037 
6038     PetscCall(PetscFree(bufj));
6039     if (!startsj_s || !bufa_ptr) {
6040       PetscCall(PetscFree2(sstartsj, rstartsj));
6041       PetscCall(PetscFree(bufa_ptr));
6042     } else {
6043       *startsj_s = sstartsj;
6044       *startsj_r = rstartsj;
6045       *bufa_ptr  = bufa;
6046     }
6047   } else if (scall == MAT_REUSE_MATRIX) {
6048     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6049   }
6050 
6051   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6052   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6053   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6054   PetscFunctionReturn(PETSC_SUCCESS);
6055 }
6056 
6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6060 #if defined(PETSC_HAVE_MKL_SPARSE)
6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6062 #endif
6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6065 #if defined(PETSC_HAVE_ELEMENTAL)
6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6067 #endif
6068 #if defined(PETSC_HAVE_SCALAPACK)
6069 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6070 #endif
6071 #if defined(PETSC_HAVE_HYPRE)
6072 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6073 #endif
6074 #if defined(PETSC_HAVE_CUDA)
6075 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6076 #endif
6077 #if defined(PETSC_HAVE_HIP)
6078 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6079 #endif
6080 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6081 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6082 #endif
6083 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6084 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6085 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6086 
6087 /*
6088     Computes (B'*A')' since computing B*A directly is untenable
6089 
6090                n                       p                          p
6091         [             ]       [             ]         [                 ]
6092       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6093         [             ]       [             ]         [                 ]
6094 
6095 */
6096 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6097 {
6098   Mat At, Bt, Ct;
6099 
6100   PetscFunctionBegin;
6101   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6102   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6103   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6104   PetscCall(MatDestroy(&At));
6105   PetscCall(MatDestroy(&Bt));
6106   PetscCall(MatTransposeSetPrecursor(Ct, C));
6107   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6108   PetscCall(MatDestroy(&Ct));
6109   PetscFunctionReturn(PETSC_SUCCESS);
6110 }
6111 
6112 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6113 {
6114   PetscBool cisdense;
6115 
6116   PetscFunctionBegin;
6117   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6118   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6119   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6120   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6121   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6122   PetscCall(MatSetUp(C));
6123 
6124   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6125   PetscFunctionReturn(PETSC_SUCCESS);
6126 }
6127 
6128 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6129 {
6130   Mat_Product *product = C->product;
6131   Mat          A = product->A, B = product->B;
6132 
6133   PetscFunctionBegin;
6134   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6135              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6136   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6137   C->ops->productsymbolic = MatProductSymbolic_AB;
6138   PetscFunctionReturn(PETSC_SUCCESS);
6139 }
6140 
6141 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6142 {
6143   Mat_Product *product = C->product;
6144 
6145   PetscFunctionBegin;
6146   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6147   PetscFunctionReturn(PETSC_SUCCESS);
6148 }
6149 
6150 /*
6151    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6152 
6153   Input Parameters:
6154 
6155     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6156     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6157 
6158     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6159 
6160     For Set1, j1[] contains column indices of the nonzeros.
6161     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6162     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6163     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6164 
6165     Similar for Set2.
6166 
6167     This routine merges the two sets of nonzeros row by row and removes repeats.
6168 
6169   Output Parameters: (memory is allocated by the caller)
6170 
6171     i[],j[]: the CSR of the merged matrix, which has m rows.
6172     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6173     imap2[]: similar to imap1[], but for Set2.
6174     Note we order nonzeros row-by-row and from left to right.
6175 */
6176 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6177 {
6178   PetscInt   r, m; /* Row index of mat */
6179   PetscCount t, t1, t2, b1, e1, b2, e2;
6180 
6181   PetscFunctionBegin;
6182   PetscCall(MatGetLocalSize(mat, &m, NULL));
6183   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6184   i[0]        = 0;
6185   for (r = 0; r < m; r++) { /* Do row by row merging */
6186     b1 = rowBegin1[r];
6187     e1 = rowEnd1[r];
6188     b2 = rowBegin2[r];
6189     e2 = rowEnd2[r];
6190     while (b1 < e1 && b2 < e2) {
6191       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6192         j[t]      = j1[b1];
6193         imap1[t1] = t;
6194         imap2[t2] = t;
6195         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6196         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6197         t1++;
6198         t2++;
6199         t++;
6200       } else if (j1[b1] < j2[b2]) {
6201         j[t]      = j1[b1];
6202         imap1[t1] = t;
6203         b1 += jmap1[t1 + 1] - jmap1[t1];
6204         t1++;
6205         t++;
6206       } else {
6207         j[t]      = j2[b2];
6208         imap2[t2] = t;
6209         b2 += jmap2[t2 + 1] - jmap2[t2];
6210         t2++;
6211         t++;
6212       }
6213     }
6214     /* Merge the remaining in either j1[] or j2[] */
6215     while (b1 < e1) {
6216       j[t]      = j1[b1];
6217       imap1[t1] = t;
6218       b1 += jmap1[t1 + 1] - jmap1[t1];
6219       t1++;
6220       t++;
6221     }
6222     while (b2 < e2) {
6223       j[t]      = j2[b2];
6224       imap2[t2] = t;
6225       b2 += jmap2[t2 + 1] - jmap2[t2];
6226       t2++;
6227       t++;
6228     }
6229     PetscCall(PetscIntCast(t, i + r + 1));
6230   }
6231   PetscFunctionReturn(PETSC_SUCCESS);
6232 }
6233 
6234 /*
6235   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6236 
6237   Input Parameters:
6238     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6239     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6240       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6241 
6242       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6243       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6244 
6245   Output Parameters:
6246     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6247     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6248       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6249       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6250 
6251     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6252       Atot: number of entries belonging to the diagonal block.
6253       Annz: number of unique nonzeros belonging to the diagonal block.
6254       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6255         repeats (i.e., same 'i,j' pair).
6256       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6257         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6258 
6259       Atot: number of entries belonging to the diagonal block
6260       Annz: number of unique nonzeros belonging to the diagonal block.
6261 
6262     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6263 
6264     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6265 */
6266 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6267 {
6268   PetscInt    cstart, cend, rstart, rend, row, col;
6269   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6270   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6271   PetscCount  k, m, p, q, r, s, mid;
6272   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6273 
6274   PetscFunctionBegin;
6275   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6276   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6277   m = rend - rstart;
6278 
6279   /* Skip negative rows */
6280   for (k = 0; k < n; k++)
6281     if (i[k] >= 0) break;
6282 
6283   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6284      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6285   */
6286   while (k < n) {
6287     row = i[k];
6288     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6289     for (s = k; s < n; s++)
6290       if (i[s] != row) break;
6291 
6292     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6293     for (p = k; p < s; p++) {
6294       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6295       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6296     }
6297     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6298     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6299     rowBegin[row - rstart] = k;
6300     rowMid[row - rstart]   = mid;
6301     rowEnd[row - rstart]   = s;
6302 
6303     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6304     Atot += mid - k;
6305     Btot += s - mid;
6306 
6307     /* Count unique nonzeros of this diag row */
6308     for (p = k; p < mid;) {
6309       col = j[p];
6310       do {
6311         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6312         p++;
6313       } while (p < mid && j[p] == col);
6314       Annz++;
6315     }
6316 
6317     /* Count unique nonzeros of this offdiag row */
6318     for (p = mid; p < s;) {
6319       col = j[p];
6320       do {
6321         p++;
6322       } while (p < s && j[p] == col);
6323       Bnnz++;
6324     }
6325     k = s;
6326   }
6327 
6328   /* Allocation according to Atot, Btot, Annz, Bnnz */
6329   PetscCall(PetscMalloc1(Atot, &Aperm));
6330   PetscCall(PetscMalloc1(Btot, &Bperm));
6331   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6332   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6333 
6334   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6335   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6336   for (r = 0; r < m; r++) {
6337     k   = rowBegin[r];
6338     mid = rowMid[r];
6339     s   = rowEnd[r];
6340     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6341     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6342     Atot += mid - k;
6343     Btot += s - mid;
6344 
6345     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6346     for (p = k; p < mid;) {
6347       col = j[p];
6348       q   = p;
6349       do {
6350         p++;
6351       } while (p < mid && j[p] == col);
6352       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6353       Annz++;
6354     }
6355 
6356     for (p = mid; p < s;) {
6357       col = j[p];
6358       q   = p;
6359       do {
6360         p++;
6361       } while (p < s && j[p] == col);
6362       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6363       Bnnz++;
6364     }
6365   }
6366   /* Output */
6367   *Aperm_ = Aperm;
6368   *Annz_  = Annz;
6369   *Atot_  = Atot;
6370   *Ajmap_ = Ajmap;
6371   *Bperm_ = Bperm;
6372   *Bnnz_  = Bnnz;
6373   *Btot_  = Btot;
6374   *Bjmap_ = Bjmap;
6375   PetscFunctionReturn(PETSC_SUCCESS);
6376 }
6377 
6378 /*
6379   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6380 
6381   Input Parameters:
6382     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6383     nnz:  number of unique nonzeros in the merged matrix
6384     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6385     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6386 
6387   Output Parameter: (memory is allocated by the caller)
6388     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6389 
6390   Example:
6391     nnz1 = 4
6392     nnz  = 6
6393     imap = [1,3,4,5]
6394     jmap = [0,3,5,6,7]
6395    then,
6396     jmap_new = [0,0,3,3,5,6,7]
6397 */
6398 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6399 {
6400   PetscCount k, p;
6401 
6402   PetscFunctionBegin;
6403   jmap_new[0] = 0;
6404   p           = nnz;                /* p loops over jmap_new[] backwards */
6405   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6406     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6407   }
6408   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6409   PetscFunctionReturn(PETSC_SUCCESS);
6410 }
6411 
6412 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6413 {
6414   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6415 
6416   PetscFunctionBegin;
6417   PetscCall(PetscSFDestroy(&coo->sf));
6418   PetscCall(PetscFree(coo->Aperm1));
6419   PetscCall(PetscFree(coo->Bperm1));
6420   PetscCall(PetscFree(coo->Ajmap1));
6421   PetscCall(PetscFree(coo->Bjmap1));
6422   PetscCall(PetscFree(coo->Aimap2));
6423   PetscCall(PetscFree(coo->Bimap2));
6424   PetscCall(PetscFree(coo->Aperm2));
6425   PetscCall(PetscFree(coo->Bperm2));
6426   PetscCall(PetscFree(coo->Ajmap2));
6427   PetscCall(PetscFree(coo->Bjmap2));
6428   PetscCall(PetscFree(coo->Cperm1));
6429   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6430   PetscCall(PetscFree(coo));
6431   PetscFunctionReturn(PETSC_SUCCESS);
6432 }
6433 
6434 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6435 {
6436   MPI_Comm             comm;
6437   PetscMPIInt          rank, size;
6438   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6439   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6440   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6441   PetscContainer       container;
6442   MatCOOStruct_MPIAIJ *coo;
6443 
6444   PetscFunctionBegin;
6445   PetscCall(PetscFree(mpiaij->garray));
6446   PetscCall(VecDestroy(&mpiaij->lvec));
6447 #if defined(PETSC_USE_CTABLE)
6448   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6449 #else
6450   PetscCall(PetscFree(mpiaij->colmap));
6451 #endif
6452   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6453   mat->assembled     = PETSC_FALSE;
6454   mat->was_assembled = PETSC_FALSE;
6455 
6456   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6457   PetscCallMPI(MPI_Comm_size(comm, &size));
6458   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6459   PetscCall(PetscLayoutSetUp(mat->rmap));
6460   PetscCall(PetscLayoutSetUp(mat->cmap));
6461   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6462   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6463   PetscCall(MatGetLocalSize(mat, &m, &n));
6464   PetscCall(MatGetSize(mat, &M, &N));
6465 
6466   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6467   /* entries come first, then local rows, then remote rows.                     */
6468   PetscCount n1 = coo_n, *perm1;
6469   PetscInt  *i1 = coo_i, *j1 = coo_j;
6470 
6471   PetscCall(PetscMalloc1(n1, &perm1));
6472   for (k = 0; k < n1; k++) perm1[k] = k;
6473 
6474   /* Manipulate indices so that entries with negative row or col indices will have smallest
6475      row indices, local entries will have greater but negative row indices, and remote entries
6476      will have positive row indices.
6477   */
6478   for (k = 0; k < n1; k++) {
6479     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6480     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6481     else {
6482       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6483       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6484     }
6485   }
6486 
6487   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6488   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6489 
6490   /* Advance k to the first entry we need to take care of */
6491   for (k = 0; k < n1; k++)
6492     if (i1[k] > PETSC_INT_MIN) break;
6493   PetscCount i1start = k;
6494 
6495   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6496   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6497 
6498   /*           Send remote rows to their owner                                  */
6499   /* Find which rows should be sent to which remote ranks*/
6500   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6501   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6502   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6503   const PetscInt *ranges;
6504   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6505 
6506   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6507   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6508   for (k = rem; k < n1;) {
6509     PetscMPIInt owner;
6510     PetscInt    firstRow, lastRow;
6511 
6512     /* Locate a row range */
6513     firstRow = i1[k]; /* first row of this owner */
6514     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6515     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6516 
6517     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6518     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6519 
6520     /* All entries in [k,p) belong to this remote owner */
6521     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6522       PetscMPIInt *sendto2;
6523       PetscInt    *nentries2;
6524       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6525 
6526       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6527       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6528       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6529       PetscCall(PetscFree2(sendto, nentries2));
6530       sendto   = sendto2;
6531       nentries = nentries2;
6532       maxNsend = maxNsend2;
6533     }
6534     sendto[nsend] = owner;
6535     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6536     nsend++;
6537     k = p;
6538   }
6539 
6540   /* Build 1st SF to know offsets on remote to send data */
6541   PetscSF      sf1;
6542   PetscInt     nroots = 1, nroots2 = 0;
6543   PetscInt     nleaves = nsend, nleaves2 = 0;
6544   PetscInt    *offsets;
6545   PetscSFNode *iremote;
6546 
6547   PetscCall(PetscSFCreate(comm, &sf1));
6548   PetscCall(PetscMalloc1(nsend, &iremote));
6549   PetscCall(PetscMalloc1(nsend, &offsets));
6550   for (k = 0; k < nsend; k++) {
6551     iremote[k].rank  = sendto[k];
6552     iremote[k].index = 0;
6553     nleaves2 += nentries[k];
6554     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6555   }
6556   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6557   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6558   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6559   PetscCall(PetscSFDestroy(&sf1));
6560   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6561 
6562   /* Build 2nd SF to send remote COOs to their owner */
6563   PetscSF sf2;
6564   nroots  = nroots2;
6565   nleaves = nleaves2;
6566   PetscCall(PetscSFCreate(comm, &sf2));
6567   PetscCall(PetscSFSetFromOptions(sf2));
6568   PetscCall(PetscMalloc1(nleaves, &iremote));
6569   p = 0;
6570   for (k = 0; k < nsend; k++) {
6571     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6572     for (q = 0; q < nentries[k]; q++, p++) {
6573       iremote[p].rank = sendto[k];
6574       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6575     }
6576   }
6577   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6578 
6579   /* Send the remote COOs to their owner */
6580   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6581   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6582   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6583   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6584   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6585   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6586   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6587   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6588   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6589   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6590   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6591 
6592   PetscCall(PetscFree(offsets));
6593   PetscCall(PetscFree2(sendto, nentries));
6594 
6595   /* Sort received COOs by row along with the permutation array     */
6596   for (k = 0; k < n2; k++) perm2[k] = k;
6597   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6598 
6599   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6600   PetscCount *Cperm1;
6601   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6602   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6603   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6604   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6605 
6606   /* Support for HYPRE matrices, kind of a hack.
6607      Swap min column with diagonal so that diagonal values will go first */
6608   PetscBool hypre;
6609   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6610   if (hypre) {
6611     PetscInt *minj;
6612     PetscBT   hasdiag;
6613 
6614     PetscCall(PetscBTCreate(m, &hasdiag));
6615     PetscCall(PetscMalloc1(m, &minj));
6616     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6617     for (k = i1start; k < rem; k++) {
6618       if (j1[k] < cstart || j1[k] >= cend) continue;
6619       const PetscInt rindex = i1[k] - rstart;
6620       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6621       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6622     }
6623     for (k = 0; k < n2; k++) {
6624       if (j2[k] < cstart || j2[k] >= cend) continue;
6625       const PetscInt rindex = i2[k] - rstart;
6626       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6627       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6628     }
6629     for (k = i1start; k < rem; k++) {
6630       const PetscInt rindex = i1[k] - rstart;
6631       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6632       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6633       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6634     }
6635     for (k = 0; k < n2; k++) {
6636       const PetscInt rindex = i2[k] - rstart;
6637       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6638       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6639       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6640     }
6641     PetscCall(PetscBTDestroy(&hasdiag));
6642     PetscCall(PetscFree(minj));
6643   }
6644 
6645   /* Split local COOs and received COOs into diag/offdiag portions */
6646   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6647   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6648   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6649   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6650   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6651   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6652 
6653   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6654   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6655   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6656   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6657 
6658   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6659   PetscInt *Ai, *Bi;
6660   PetscInt *Aj, *Bj;
6661 
6662   PetscCall(PetscMalloc1(m + 1, &Ai));
6663   PetscCall(PetscMalloc1(m + 1, &Bi));
6664   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6665   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6666 
6667   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6668   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6669   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6670   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6671   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6672 
6673   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6674   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6675 
6676   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6677   /* expect nonzeros in A/B most likely have local contributing entries        */
6678   PetscInt    Annz = Ai[m];
6679   PetscInt    Bnnz = Bi[m];
6680   PetscCount *Ajmap1_new, *Bjmap1_new;
6681 
6682   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6683   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6684 
6685   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6686   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6687 
6688   PetscCall(PetscFree(Aimap1));
6689   PetscCall(PetscFree(Ajmap1));
6690   PetscCall(PetscFree(Bimap1));
6691   PetscCall(PetscFree(Bjmap1));
6692   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6693   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6694   PetscCall(PetscFree(perm1));
6695   PetscCall(PetscFree3(i2, j2, perm2));
6696 
6697   Ajmap1 = Ajmap1_new;
6698   Bjmap1 = Bjmap1_new;
6699 
6700   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6701   if (Annz < Annz1 + Annz2) {
6702     PetscInt *Aj_new;
6703     PetscCall(PetscMalloc1(Annz, &Aj_new));
6704     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6705     PetscCall(PetscFree(Aj));
6706     Aj = Aj_new;
6707   }
6708 
6709   if (Bnnz < Bnnz1 + Bnnz2) {
6710     PetscInt *Bj_new;
6711     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6712     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6713     PetscCall(PetscFree(Bj));
6714     Bj = Bj_new;
6715   }
6716 
6717   /* Create new submatrices for on-process and off-process coupling                  */
6718   PetscScalar     *Aa, *Ba;
6719   MatType          rtype;
6720   Mat_SeqAIJ      *a, *b;
6721   PetscObjectState state;
6722   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6723   PetscCall(PetscCalloc1(Bnnz, &Ba));
6724   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6725   if (cstart) {
6726     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6727   }
6728 
6729   PetscCall(MatGetRootType_Private(mat, &rtype));
6730 
6731   MatSeqXAIJGetOptions_Private(mpiaij->A);
6732   PetscCall(MatDestroy(&mpiaij->A));
6733   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6734   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6735   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6736 
6737   MatSeqXAIJGetOptions_Private(mpiaij->B);
6738   PetscCall(MatDestroy(&mpiaij->B));
6739   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6740   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6741   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6742 
6743   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6744   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6745   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6746   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6747 
6748   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6749   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6750   a->free_a  = PETSC_TRUE;
6751   a->free_ij = PETSC_TRUE;
6752   b->free_a  = PETSC_TRUE;
6753   b->free_ij = PETSC_TRUE;
6754   a->maxnz   = a->nz;
6755   b->maxnz   = b->nz;
6756 
6757   /* conversion must happen AFTER multiply setup */
6758   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6759   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6760   PetscCall(VecDestroy(&mpiaij->lvec));
6761   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6762 
6763   // Put the COO struct in a container and then attach that to the matrix
6764   PetscCall(PetscMalloc1(1, &coo));
6765   coo->n       = coo_n;
6766   coo->sf      = sf2;
6767   coo->sendlen = nleaves;
6768   coo->recvlen = nroots;
6769   coo->Annz    = Annz;
6770   coo->Bnnz    = Bnnz;
6771   coo->Annz2   = Annz2;
6772   coo->Bnnz2   = Bnnz2;
6773   coo->Atot1   = Atot1;
6774   coo->Atot2   = Atot2;
6775   coo->Btot1   = Btot1;
6776   coo->Btot2   = Btot2;
6777   coo->Ajmap1  = Ajmap1;
6778   coo->Aperm1  = Aperm1;
6779   coo->Bjmap1  = Bjmap1;
6780   coo->Bperm1  = Bperm1;
6781   coo->Aimap2  = Aimap2;
6782   coo->Ajmap2  = Ajmap2;
6783   coo->Aperm2  = Aperm2;
6784   coo->Bimap2  = Bimap2;
6785   coo->Bjmap2  = Bjmap2;
6786   coo->Bperm2  = Bperm2;
6787   coo->Cperm1  = Cperm1;
6788   // Allocate in preallocation. If not used, it has zero cost on host
6789   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6790   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6791   PetscCall(PetscContainerSetPointer(container, coo));
6792   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6793   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6794   PetscCall(PetscContainerDestroy(&container));
6795   PetscFunctionReturn(PETSC_SUCCESS);
6796 }
6797 
6798 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6799 {
6800   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6801   Mat                  A = mpiaij->A, B = mpiaij->B;
6802   PetscScalar         *Aa, *Ba;
6803   PetscScalar         *sendbuf, *recvbuf;
6804   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6805   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6806   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6807   const PetscCount    *Cperm1;
6808   PetscContainer       container;
6809   MatCOOStruct_MPIAIJ *coo;
6810 
6811   PetscFunctionBegin;
6812   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6813   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6814   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6815   sendbuf = coo->sendbuf;
6816   recvbuf = coo->recvbuf;
6817   Ajmap1  = coo->Ajmap1;
6818   Ajmap2  = coo->Ajmap2;
6819   Aimap2  = coo->Aimap2;
6820   Bjmap1  = coo->Bjmap1;
6821   Bjmap2  = coo->Bjmap2;
6822   Bimap2  = coo->Bimap2;
6823   Aperm1  = coo->Aperm1;
6824   Aperm2  = coo->Aperm2;
6825   Bperm1  = coo->Bperm1;
6826   Bperm2  = coo->Bperm2;
6827   Cperm1  = coo->Cperm1;
6828 
6829   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6830   PetscCall(MatSeqAIJGetArray(B, &Ba));
6831 
6832   /* Pack entries to be sent to remote */
6833   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6834 
6835   /* Send remote entries to their owner and overlap the communication with local computation */
6836   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6837   /* Add local entries to A and B */
6838   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6839     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6840     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6841     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6842   }
6843   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6844     PetscScalar sum = 0.0;
6845     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6846     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6847   }
6848   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6849 
6850   /* Add received remote entries to A and B */
6851   for (PetscCount i = 0; i < coo->Annz2; i++) {
6852     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6853   }
6854   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6855     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6856   }
6857   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6858   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6859   PetscFunctionReturn(PETSC_SUCCESS);
6860 }
6861 
6862 /*MC
6863    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6864 
6865    Options Database Keys:
6866 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6867 
6868    Level: beginner
6869 
6870    Notes:
6871    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6872     in this case the values associated with the rows and columns one passes in are set to zero
6873     in the matrix
6874 
6875     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6876     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6877 
6878 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6879 M*/
6880 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6881 {
6882   Mat_MPIAIJ *b;
6883   PetscMPIInt size;
6884 
6885   PetscFunctionBegin;
6886   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6887 
6888   PetscCall(PetscNew(&b));
6889   B->data       = (void *)b;
6890   B->ops[0]     = MatOps_Values;
6891   B->assembled  = PETSC_FALSE;
6892   B->insertmode = NOT_SET_VALUES;
6893   b->size       = size;
6894 
6895   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6896 
6897   /* build cache for off array entries formed */
6898   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6899 
6900   b->donotstash  = PETSC_FALSE;
6901   b->colmap      = NULL;
6902   b->garray      = NULL;
6903   b->roworiented = PETSC_TRUE;
6904 
6905   /* stuff used for matrix vector multiply */
6906   b->lvec  = NULL;
6907   b->Mvctx = NULL;
6908 
6909   /* stuff for MatGetRow() */
6910   b->rowindices   = NULL;
6911   b->rowvalues    = NULL;
6912   b->getrowactive = PETSC_FALSE;
6913 
6914   /* flexible pointer used in CUSPARSE classes */
6915   b->spptr = NULL;
6916 
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6928 #if defined(PETSC_HAVE_CUDA)
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6930 #endif
6931 #if defined(PETSC_HAVE_HIP)
6932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6933 #endif
6934 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6936 #endif
6937 #if defined(PETSC_HAVE_MKL_SPARSE)
6938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6939 #endif
6940   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6941   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6942   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6943   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6944 #if defined(PETSC_HAVE_ELEMENTAL)
6945   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6946 #endif
6947 #if defined(PETSC_HAVE_SCALAPACK)
6948   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6949 #endif
6950   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6951   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6952 #if defined(PETSC_HAVE_HYPRE)
6953   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6954   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6955 #endif
6956   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6957   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6958   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6959   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6960   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6961   PetscFunctionReturn(PETSC_SUCCESS);
6962 }
6963 
6964 /*@
6965   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6966   and "off-diagonal" part of the matrix in CSR format.
6967 
6968   Collective
6969 
6970   Input Parameters:
6971 + comm - MPI communicator
6972 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6973 . n    - This value should be the same as the local size used in creating the
6974          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6975          calculated if `N` is given) For square matrices `n` is almost always `m`.
6976 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6977 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6978 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6979 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6980 . a    - matrix values
6981 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6982 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6983 - oa   - matrix values
6984 
6985   Output Parameter:
6986 . mat - the matrix
6987 
6988   Level: advanced
6989 
6990   Notes:
6991   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6992   must free the arrays once the matrix has been destroyed and not before.
6993 
6994   The `i` and `j` indices are 0 based
6995 
6996   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6997 
6998   This sets local rows and cannot be used to set off-processor values.
6999 
7000   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
7001   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
7002   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
7003   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
7004   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
7005   communication if it is known that only local entries will be set.
7006 
7007 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
7008           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
7009 @*/
7010 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
7011 {
7012   Mat_MPIAIJ *maij;
7013 
7014   PetscFunctionBegin;
7015   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
7016   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
7017   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
7018   PetscCall(MatCreate(comm, mat));
7019   PetscCall(MatSetSizes(*mat, m, n, M, N));
7020   PetscCall(MatSetType(*mat, MATMPIAIJ));
7021   maij = (Mat_MPIAIJ *)(*mat)->data;
7022 
7023   (*mat)->preallocated = PETSC_TRUE;
7024 
7025   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7026   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7027 
7028   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7029   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7030 
7031   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7032   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7033   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7034   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7035   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7036   PetscFunctionReturn(PETSC_SUCCESS);
7037 }
7038 
7039 typedef struct {
7040   Mat       *mp;    /* intermediate products */
7041   PetscBool *mptmp; /* is the intermediate product temporary ? */
7042   PetscInt   cp;    /* number of intermediate products */
7043 
7044   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7045   PetscInt    *startsj_s, *startsj_r;
7046   PetscScalar *bufa;
7047   Mat          P_oth;
7048 
7049   /* may take advantage of merging product->B */
7050   Mat Bloc; /* B-local by merging diag and off-diag */
7051 
7052   /* cusparse does not have support to split between symbolic and numeric phases.
7053      When api_user is true, we don't need to update the numerical values
7054      of the temporary storage */
7055   PetscBool reusesym;
7056 
7057   /* support for COO values insertion */
7058   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7059   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7060   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7061   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7062   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7063   PetscMemType mtype;
7064 
7065   /* customization */
7066   PetscBool abmerge;
7067   PetscBool P_oth_bind;
7068 } MatMatMPIAIJBACKEND;
7069 
7070 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7071 {
7072   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7073   PetscInt             i;
7074 
7075   PetscFunctionBegin;
7076   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7077   PetscCall(PetscFree(mmdata->bufa));
7078   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7079   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7080   PetscCall(MatDestroy(&mmdata->P_oth));
7081   PetscCall(MatDestroy(&mmdata->Bloc));
7082   PetscCall(PetscSFDestroy(&mmdata->sf));
7083   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7084   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7085   PetscCall(PetscFree(mmdata->own[0]));
7086   PetscCall(PetscFree(mmdata->own));
7087   PetscCall(PetscFree(mmdata->off[0]));
7088   PetscCall(PetscFree(mmdata->off));
7089   PetscCall(PetscFree(mmdata));
7090   PetscFunctionReturn(PETSC_SUCCESS);
7091 }
7092 
7093 /* Copy selected n entries with indices in idx[] of A to v[].
7094    If idx is NULL, copy the whole data array of A to v[]
7095  */
7096 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7097 {
7098   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7099 
7100   PetscFunctionBegin;
7101   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7102   if (f) {
7103     PetscCall((*f)(A, n, idx, v));
7104   } else {
7105     const PetscScalar *vv;
7106 
7107     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7108     if (n && idx) {
7109       PetscScalar    *w  = v;
7110       const PetscInt *oi = idx;
7111       PetscInt        j;
7112 
7113       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7114     } else {
7115       PetscCall(PetscArraycpy(v, vv, n));
7116     }
7117     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7118   }
7119   PetscFunctionReturn(PETSC_SUCCESS);
7120 }
7121 
7122 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7123 {
7124   MatMatMPIAIJBACKEND *mmdata;
7125   PetscInt             i, n_d, n_o;
7126 
7127   PetscFunctionBegin;
7128   MatCheckProduct(C, 1);
7129   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7130   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7131   if (!mmdata->reusesym) { /* update temporary matrices */
7132     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7133     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7134   }
7135   mmdata->reusesym = PETSC_FALSE;
7136 
7137   for (i = 0; i < mmdata->cp; i++) {
7138     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7139     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7140   }
7141   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7142     PetscInt noff;
7143 
7144     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7145     if (mmdata->mptmp[i]) continue;
7146     if (noff) {
7147       PetscInt nown;
7148 
7149       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7150       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7151       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7152       n_o += noff;
7153       n_d += nown;
7154     } else {
7155       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7156 
7157       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7158       n_d += mm->nz;
7159     }
7160   }
7161   if (mmdata->hasoffproc) { /* offprocess insertion */
7162     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7163     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7164   }
7165   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7166   PetscFunctionReturn(PETSC_SUCCESS);
7167 }
7168 
7169 /* Support for Pt * A, A * P, or Pt * A * P */
7170 #define MAX_NUMBER_INTERMEDIATE 4
7171 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7172 {
7173   Mat_Product           *product = C->product;
7174   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7175   Mat_MPIAIJ            *a, *p;
7176   MatMatMPIAIJBACKEND   *mmdata;
7177   ISLocalToGlobalMapping P_oth_l2g = NULL;
7178   IS                     glob      = NULL;
7179   const char            *prefix;
7180   char                   pprefix[256];
7181   const PetscInt        *globidx, *P_oth_idx;
7182   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7183   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7184   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7185                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7186                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7187   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7188 
7189   MatProductType ptype;
7190   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7191   PetscMPIInt    size;
7192 
7193   PetscFunctionBegin;
7194   MatCheckProduct(C, 1);
7195   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7196   ptype = product->type;
7197   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7198     ptype                                          = MATPRODUCT_AB;
7199     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7200   }
7201   switch (ptype) {
7202   case MATPRODUCT_AB:
7203     A          = product->A;
7204     P          = product->B;
7205     m          = A->rmap->n;
7206     n          = P->cmap->n;
7207     M          = A->rmap->N;
7208     N          = P->cmap->N;
7209     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7210     break;
7211   case MATPRODUCT_AtB:
7212     P          = product->A;
7213     A          = product->B;
7214     m          = P->cmap->n;
7215     n          = A->cmap->n;
7216     M          = P->cmap->N;
7217     N          = A->cmap->N;
7218     hasoffproc = PETSC_TRUE;
7219     break;
7220   case MATPRODUCT_PtAP:
7221     A          = product->A;
7222     P          = product->B;
7223     m          = P->cmap->n;
7224     n          = P->cmap->n;
7225     M          = P->cmap->N;
7226     N          = P->cmap->N;
7227     hasoffproc = PETSC_TRUE;
7228     break;
7229   default:
7230     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7231   }
7232   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7233   if (size == 1) hasoffproc = PETSC_FALSE;
7234 
7235   /* defaults */
7236   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7237     mp[i]    = NULL;
7238     mptmp[i] = PETSC_FALSE;
7239     rmapt[i] = -1;
7240     cmapt[i] = -1;
7241     rmapa[i] = NULL;
7242     cmapa[i] = NULL;
7243   }
7244 
7245   /* customization */
7246   PetscCall(PetscNew(&mmdata));
7247   mmdata->reusesym = product->api_user;
7248   if (ptype == MATPRODUCT_AB) {
7249     if (product->api_user) {
7250       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7251       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7252       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7253       PetscOptionsEnd();
7254     } else {
7255       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7256       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7257       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7258       PetscOptionsEnd();
7259     }
7260   } else if (ptype == MATPRODUCT_PtAP) {
7261     if (product->api_user) {
7262       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7263       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7264       PetscOptionsEnd();
7265     } else {
7266       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7267       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7268       PetscOptionsEnd();
7269     }
7270   }
7271   a = (Mat_MPIAIJ *)A->data;
7272   p = (Mat_MPIAIJ *)P->data;
7273   PetscCall(MatSetSizes(C, m, n, M, N));
7274   PetscCall(PetscLayoutSetUp(C->rmap));
7275   PetscCall(PetscLayoutSetUp(C->cmap));
7276   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7277   PetscCall(MatGetOptionsPrefix(C, &prefix));
7278 
7279   cp = 0;
7280   switch (ptype) {
7281   case MATPRODUCT_AB: /* A * P */
7282     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7283 
7284     /* A_diag * P_local (merged or not) */
7285     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7286       /* P is product->B */
7287       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7288       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7289       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7290       PetscCall(MatProductSetFill(mp[cp], product->fill));
7291       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7292       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7293       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7294       mp[cp]->product->api_user = product->api_user;
7295       PetscCall(MatProductSetFromOptions(mp[cp]));
7296       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7297       PetscCall(ISGetIndices(glob, &globidx));
7298       rmapt[cp] = 1;
7299       cmapt[cp] = 2;
7300       cmapa[cp] = globidx;
7301       mptmp[cp] = PETSC_FALSE;
7302       cp++;
7303     } else { /* A_diag * P_diag and A_diag * P_off */
7304       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7305       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7306       PetscCall(MatProductSetFill(mp[cp], product->fill));
7307       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7308       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7309       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7310       mp[cp]->product->api_user = product->api_user;
7311       PetscCall(MatProductSetFromOptions(mp[cp]));
7312       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7313       rmapt[cp] = 1;
7314       cmapt[cp] = 1;
7315       mptmp[cp] = PETSC_FALSE;
7316       cp++;
7317       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7318       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7319       PetscCall(MatProductSetFill(mp[cp], product->fill));
7320       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7321       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7322       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7323       mp[cp]->product->api_user = product->api_user;
7324       PetscCall(MatProductSetFromOptions(mp[cp]));
7325       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7326       rmapt[cp] = 1;
7327       cmapt[cp] = 2;
7328       cmapa[cp] = p->garray;
7329       mptmp[cp] = PETSC_FALSE;
7330       cp++;
7331     }
7332 
7333     /* A_off * P_other */
7334     if (mmdata->P_oth) {
7335       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7336       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7337       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7338       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7339       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7340       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7341       PetscCall(MatProductSetFill(mp[cp], product->fill));
7342       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7343       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7344       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7345       mp[cp]->product->api_user = product->api_user;
7346       PetscCall(MatProductSetFromOptions(mp[cp]));
7347       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7348       rmapt[cp] = 1;
7349       cmapt[cp] = 2;
7350       cmapa[cp] = P_oth_idx;
7351       mptmp[cp] = PETSC_FALSE;
7352       cp++;
7353     }
7354     break;
7355 
7356   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7357     /* A is product->B */
7358     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7359     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7360       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7361       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7362       PetscCall(MatProductSetFill(mp[cp], product->fill));
7363       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7364       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7365       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7366       mp[cp]->product->api_user = product->api_user;
7367       PetscCall(MatProductSetFromOptions(mp[cp]));
7368       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7369       PetscCall(ISGetIndices(glob, &globidx));
7370       rmapt[cp] = 2;
7371       rmapa[cp] = globidx;
7372       cmapt[cp] = 2;
7373       cmapa[cp] = globidx;
7374       mptmp[cp] = PETSC_FALSE;
7375       cp++;
7376     } else {
7377       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7378       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7379       PetscCall(MatProductSetFill(mp[cp], product->fill));
7380       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7381       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7382       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7383       mp[cp]->product->api_user = product->api_user;
7384       PetscCall(MatProductSetFromOptions(mp[cp]));
7385       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7386       PetscCall(ISGetIndices(glob, &globidx));
7387       rmapt[cp] = 1;
7388       cmapt[cp] = 2;
7389       cmapa[cp] = globidx;
7390       mptmp[cp] = PETSC_FALSE;
7391       cp++;
7392       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7393       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7394       PetscCall(MatProductSetFill(mp[cp], product->fill));
7395       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7396       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7397       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7398       mp[cp]->product->api_user = product->api_user;
7399       PetscCall(MatProductSetFromOptions(mp[cp]));
7400       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7401       rmapt[cp] = 2;
7402       rmapa[cp] = p->garray;
7403       cmapt[cp] = 2;
7404       cmapa[cp] = globidx;
7405       mptmp[cp] = PETSC_FALSE;
7406       cp++;
7407     }
7408     break;
7409   case MATPRODUCT_PtAP:
7410     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7411     /* P is product->B */
7412     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7413     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7414     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7415     PetscCall(MatProductSetFill(mp[cp], product->fill));
7416     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7417     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7418     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7419     mp[cp]->product->api_user = product->api_user;
7420     PetscCall(MatProductSetFromOptions(mp[cp]));
7421     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7422     PetscCall(ISGetIndices(glob, &globidx));
7423     rmapt[cp] = 2;
7424     rmapa[cp] = globidx;
7425     cmapt[cp] = 2;
7426     cmapa[cp] = globidx;
7427     mptmp[cp] = PETSC_FALSE;
7428     cp++;
7429     if (mmdata->P_oth) {
7430       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7431       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7432       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7433       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7434       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7435       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7436       PetscCall(MatProductSetFill(mp[cp], product->fill));
7437       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7438       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7439       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7440       mp[cp]->product->api_user = product->api_user;
7441       PetscCall(MatProductSetFromOptions(mp[cp]));
7442       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7443       mptmp[cp] = PETSC_TRUE;
7444       cp++;
7445       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7446       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7447       PetscCall(MatProductSetFill(mp[cp], product->fill));
7448       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7449       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7450       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7451       mp[cp]->product->api_user = product->api_user;
7452       PetscCall(MatProductSetFromOptions(mp[cp]));
7453       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7454       rmapt[cp] = 2;
7455       rmapa[cp] = globidx;
7456       cmapt[cp] = 2;
7457       cmapa[cp] = P_oth_idx;
7458       mptmp[cp] = PETSC_FALSE;
7459       cp++;
7460     }
7461     break;
7462   default:
7463     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7464   }
7465   /* sanity check */
7466   if (size > 1)
7467     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7468 
7469   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7470   for (i = 0; i < cp; i++) {
7471     mmdata->mp[i]    = mp[i];
7472     mmdata->mptmp[i] = mptmp[i];
7473   }
7474   mmdata->cp             = cp;
7475   C->product->data       = mmdata;
7476   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7477   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7478 
7479   /* memory type */
7480   mmdata->mtype = PETSC_MEMTYPE_HOST;
7481   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7482   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7483   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7484   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7485   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7486   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7487 
7488   /* prepare coo coordinates for values insertion */
7489 
7490   /* count total nonzeros of those intermediate seqaij Mats
7491     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7492     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7493     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7494   */
7495   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7496     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7497     if (mptmp[cp]) continue;
7498     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7499       const PetscInt *rmap = rmapa[cp];
7500       const PetscInt  mr   = mp[cp]->rmap->n;
7501       const PetscInt  rs   = C->rmap->rstart;
7502       const PetscInt  re   = C->rmap->rend;
7503       const PetscInt *ii   = mm->i;
7504       for (i = 0; i < mr; i++) {
7505         const PetscInt gr = rmap[i];
7506         const PetscInt nz = ii[i + 1] - ii[i];
7507         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7508         else ncoo_oown += nz;                  /* this row is local */
7509       }
7510     } else ncoo_d += mm->nz;
7511   }
7512 
7513   /*
7514     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7515 
7516     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7517 
7518     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7519 
7520     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7521     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7522     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7523 
7524     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7525     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7526   */
7527   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7528   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7529 
7530   /* gather (i,j) of nonzeros inserted by remote procs */
7531   if (hasoffproc) {
7532     PetscSF  msf;
7533     PetscInt ncoo2, *coo_i2, *coo_j2;
7534 
7535     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7536     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7537     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7538 
7539     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7540       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7541       PetscInt   *idxoff = mmdata->off[cp];
7542       PetscInt   *idxown = mmdata->own[cp];
7543       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7544         const PetscInt *rmap = rmapa[cp];
7545         const PetscInt *cmap = cmapa[cp];
7546         const PetscInt *ii   = mm->i;
7547         PetscInt       *coi  = coo_i + ncoo_o;
7548         PetscInt       *coj  = coo_j + ncoo_o;
7549         const PetscInt  mr   = mp[cp]->rmap->n;
7550         const PetscInt  rs   = C->rmap->rstart;
7551         const PetscInt  re   = C->rmap->rend;
7552         const PetscInt  cs   = C->cmap->rstart;
7553         for (i = 0; i < mr; i++) {
7554           const PetscInt *jj = mm->j + ii[i];
7555           const PetscInt  gr = rmap[i];
7556           const PetscInt  nz = ii[i + 1] - ii[i];
7557           if (gr < rs || gr >= re) { /* this is an offproc row */
7558             for (j = ii[i]; j < ii[i + 1]; j++) {
7559               *coi++    = gr;
7560               *idxoff++ = j;
7561             }
7562             if (!cmapt[cp]) { /* already global */
7563               for (j = 0; j < nz; j++) *coj++ = jj[j];
7564             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7565               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7566             } else { /* offdiag */
7567               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7568             }
7569             ncoo_o += nz;
7570           } else { /* this is a local row */
7571             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7572           }
7573         }
7574       }
7575       mmdata->off[cp + 1] = idxoff;
7576       mmdata->own[cp + 1] = idxown;
7577     }
7578 
7579     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7580     PetscInt incoo_o;
7581     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7582     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7583     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7584     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7585     ncoo = ncoo_d + ncoo_oown + ncoo2;
7586     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7587     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7588     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7589     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7590     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7591     PetscCall(PetscFree2(coo_i, coo_j));
7592     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7593     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7594     coo_i = coo_i2;
7595     coo_j = coo_j2;
7596   } else { /* no offproc values insertion */
7597     ncoo = ncoo_d;
7598     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7599 
7600     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7601     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7602     PetscCall(PetscSFSetUp(mmdata->sf));
7603   }
7604   mmdata->hasoffproc = hasoffproc;
7605 
7606   /* gather (i,j) of nonzeros inserted locally */
7607   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7608     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7609     PetscInt       *coi  = coo_i + ncoo_d;
7610     PetscInt       *coj  = coo_j + ncoo_d;
7611     const PetscInt *jj   = mm->j;
7612     const PetscInt *ii   = mm->i;
7613     const PetscInt *cmap = cmapa[cp];
7614     const PetscInt *rmap = rmapa[cp];
7615     const PetscInt  mr   = mp[cp]->rmap->n;
7616     const PetscInt  rs   = C->rmap->rstart;
7617     const PetscInt  re   = C->rmap->rend;
7618     const PetscInt  cs   = C->cmap->rstart;
7619 
7620     if (mptmp[cp]) continue;
7621     if (rmapt[cp] == 1) { /* consecutive rows */
7622       /* fill coo_i */
7623       for (i = 0; i < mr; i++) {
7624         const PetscInt gr = i + rs;
7625         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7626       }
7627       /* fill coo_j */
7628       if (!cmapt[cp]) { /* type-0, already global */
7629         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7630       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7631         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7632       } else {                                            /* type-2, local to global for sparse columns */
7633         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7634       }
7635       ncoo_d += mm->nz;
7636     } else if (rmapt[cp] == 2) { /* sparse rows */
7637       for (i = 0; i < mr; i++) {
7638         const PetscInt *jj = mm->j + ii[i];
7639         const PetscInt  gr = rmap[i];
7640         const PetscInt  nz = ii[i + 1] - ii[i];
7641         if (gr >= rs && gr < re) { /* local rows */
7642           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7643           if (!cmapt[cp]) { /* type-0, already global */
7644             for (j = 0; j < nz; j++) *coj++ = jj[j];
7645           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7646             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7647           } else { /* type-2, local to global for sparse columns */
7648             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7649           }
7650           ncoo_d += nz;
7651         }
7652       }
7653     }
7654   }
7655   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7656   PetscCall(ISDestroy(&glob));
7657   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7658   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7659   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7660   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7661 
7662   /* preallocate with COO data */
7663   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7664   PetscCall(PetscFree2(coo_i, coo_j));
7665   PetscFunctionReturn(PETSC_SUCCESS);
7666 }
7667 
7668 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7669 {
7670   Mat_Product *product = mat->product;
7671 #if defined(PETSC_HAVE_DEVICE)
7672   PetscBool match  = PETSC_FALSE;
7673   PetscBool usecpu = PETSC_FALSE;
7674 #else
7675   PetscBool match = PETSC_TRUE;
7676 #endif
7677 
7678   PetscFunctionBegin;
7679   MatCheckProduct(mat, 1);
7680 #if defined(PETSC_HAVE_DEVICE)
7681   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7682   if (match) { /* we can always fallback to the CPU if requested */
7683     switch (product->type) {
7684     case MATPRODUCT_AB:
7685       if (product->api_user) {
7686         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7687         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7688         PetscOptionsEnd();
7689       } else {
7690         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7691         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7692         PetscOptionsEnd();
7693       }
7694       break;
7695     case MATPRODUCT_AtB:
7696       if (product->api_user) {
7697         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7698         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7699         PetscOptionsEnd();
7700       } else {
7701         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7702         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7703         PetscOptionsEnd();
7704       }
7705       break;
7706     case MATPRODUCT_PtAP:
7707       if (product->api_user) {
7708         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7709         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7710         PetscOptionsEnd();
7711       } else {
7712         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7713         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7714         PetscOptionsEnd();
7715       }
7716       break;
7717     default:
7718       break;
7719     }
7720     match = (PetscBool)!usecpu;
7721   }
7722 #endif
7723   if (match) {
7724     switch (product->type) {
7725     case MATPRODUCT_AB:
7726     case MATPRODUCT_AtB:
7727     case MATPRODUCT_PtAP:
7728       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7729       break;
7730     default:
7731       break;
7732     }
7733   }
7734   /* fallback to MPIAIJ ops */
7735   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7736   PetscFunctionReturn(PETSC_SUCCESS);
7737 }
7738 
7739 /*
7740    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7741 
7742    n - the number of block indices in cc[]
7743    cc - the block indices (must be large enough to contain the indices)
7744 */
7745 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7746 {
7747   PetscInt        cnt = -1, nidx, j;
7748   const PetscInt *idx;
7749 
7750   PetscFunctionBegin;
7751   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7752   if (nidx) {
7753     cnt     = 0;
7754     cc[cnt] = idx[0] / bs;
7755     for (j = 1; j < nidx; j++) {
7756       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7757     }
7758   }
7759   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7760   *n = cnt + 1;
7761   PetscFunctionReturn(PETSC_SUCCESS);
7762 }
7763 
7764 /*
7765     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7766 
7767     ncollapsed - the number of block indices
7768     collapsed - the block indices (must be large enough to contain the indices)
7769 */
7770 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7771 {
7772   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7773 
7774   PetscFunctionBegin;
7775   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7776   for (i = start + 1; i < start + bs; i++) {
7777     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7778     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7779     cprevtmp = cprev;
7780     cprev    = merged;
7781     merged   = cprevtmp;
7782   }
7783   *ncollapsed = nprev;
7784   if (collapsed) *collapsed = cprev;
7785   PetscFunctionReturn(PETSC_SUCCESS);
7786 }
7787 
7788 /*
7789  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7790 
7791  Input Parameter:
7792  . Amat - matrix
7793  - symmetrize - make the result symmetric
7794  + scale - scale with diagonal
7795 
7796  Output Parameter:
7797  . a_Gmat - output scalar graph >= 0
7798 
7799 */
7800 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7801 {
7802   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7803   MPI_Comm  comm;
7804   Mat       Gmat;
7805   PetscBool ismpiaij, isseqaij;
7806   Mat       a, b, c;
7807   MatType   jtype;
7808 
7809   PetscFunctionBegin;
7810   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7811   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7812   PetscCall(MatGetSize(Amat, &MM, &NN));
7813   PetscCall(MatGetBlockSize(Amat, &bs));
7814   nloc = (Iend - Istart) / bs;
7815 
7816   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7817   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7818   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7819 
7820   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7821   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7822      implementation */
7823   if (bs > 1) {
7824     PetscCall(MatGetType(Amat, &jtype));
7825     PetscCall(MatCreate(comm, &Gmat));
7826     PetscCall(MatSetType(Gmat, jtype));
7827     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7828     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7829     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7830       PetscInt  *d_nnz, *o_nnz;
7831       MatScalar *aa, val, *AA;
7832       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7833 
7834       if (isseqaij) {
7835         a = Amat;
7836         b = NULL;
7837       } else {
7838         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7839         a             = d->A;
7840         b             = d->B;
7841       }
7842       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7843       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7844       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7845         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7846         const PetscInt *cols1, *cols2;
7847 
7848         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7849           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7850           nnz[brow / bs] = nc2 / bs;
7851           if (nc2 % bs) ok = 0;
7852           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7853           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7854             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7855             if (nc1 != nc2) ok = 0;
7856             else {
7857               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7858                 if (cols1[jj] != cols2[jj]) ok = 0;
7859                 if (cols1[jj] % bs != jj % bs) ok = 0;
7860               }
7861             }
7862             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7863           }
7864           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7865           if (!ok) {
7866             PetscCall(PetscFree2(d_nnz, o_nnz));
7867             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7868             goto old_bs;
7869           }
7870         }
7871       }
7872       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7873       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7874       PetscCall(PetscFree2(d_nnz, o_nnz));
7875       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7876       // diag
7877       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7878         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7879 
7880         ai = aseq->i;
7881         n  = ai[brow + 1] - ai[brow];
7882         aj = aseq->j + ai[brow];
7883         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7884           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7885           val        = 0;
7886           if (index_size == 0) {
7887             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7888               aa = aseq->a + ai[brow + ii] + k;
7889               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7890                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7891               }
7892             }
7893           } else {                                            // use (index,index) value if provided
7894             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7895               PetscInt ii = index[iii];
7896               aa          = aseq->a + ai[brow + ii] + k;
7897               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7898                 PetscInt jj = index[jjj];
7899                 val += PetscAbs(PetscRealPart(aa[jj]));
7900               }
7901             }
7902           }
7903           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7904           AA[k / bs] = val;
7905         }
7906         grow = Istart / bs + brow / bs;
7907         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7908       }
7909       // off-diag
7910       if (ismpiaij) {
7911         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7912         const PetscScalar *vals;
7913         const PetscInt    *cols, *garray = aij->garray;
7914 
7915         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7916         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7917           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7918           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7919             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7920             AA[k / bs] = 0;
7921             AJ[cidx]   = garray[cols[k]] / bs;
7922           }
7923           nc = ncols / bs;
7924           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7925           if (index_size == 0) {
7926             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7927               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7928               for (PetscInt k = 0; k < ncols; k += bs) {
7929                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7930                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7931                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7932                 }
7933               }
7934               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7935             }
7936           } else {                                            // use (index,index) value if provided
7937             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7938               PetscInt ii = index[iii];
7939               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7940               for (PetscInt k = 0; k < ncols; k += bs) {
7941                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7942                   PetscInt jj = index[jjj];
7943                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7944                 }
7945               }
7946               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7947             }
7948           }
7949           grow = Istart / bs + brow / bs;
7950           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7951         }
7952       }
7953       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7954       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7955       PetscCall(PetscFree2(AA, AJ));
7956     } else {
7957       const PetscScalar *vals;
7958       const PetscInt    *idx;
7959       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7960     old_bs:
7961       /*
7962        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7963        */
7964       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7965       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7966       if (isseqaij) {
7967         PetscInt max_d_nnz;
7968 
7969         /*
7970          Determine exact preallocation count for (sequential) scalar matrix
7971          */
7972         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7973         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7974         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7975         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7976         PetscCall(PetscFree3(w0, w1, w2));
7977       } else if (ismpiaij) {
7978         Mat             Daij, Oaij;
7979         const PetscInt *garray;
7980         PetscInt        max_d_nnz;
7981 
7982         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7983         /*
7984          Determine exact preallocation count for diagonal block portion of scalar matrix
7985          */
7986         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7987         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7988         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7989         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7990         PetscCall(PetscFree3(w0, w1, w2));
7991         /*
7992          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7993          */
7994         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7995           o_nnz[jj] = 0;
7996           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7997             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7998             o_nnz[jj] += ncols;
7999             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
8000           }
8001           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
8002         }
8003       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
8004       /* get scalar copy (norms) of matrix */
8005       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
8006       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
8007       PetscCall(PetscFree2(d_nnz, o_nnz));
8008       for (Ii = Istart; Ii < Iend; Ii++) {
8009         PetscInt dest_row = Ii / bs;
8010 
8011         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
8012         for (jj = 0; jj < ncols; jj++) {
8013           PetscInt    dest_col = idx[jj] / bs;
8014           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
8015 
8016           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
8017         }
8018         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8019       }
8020       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8021       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8022     }
8023   } else {
8024     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8025     else {
8026       Gmat = Amat;
8027       PetscCall(PetscObjectReference((PetscObject)Gmat));
8028     }
8029     if (isseqaij) {
8030       a = Gmat;
8031       b = NULL;
8032     } else {
8033       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8034       a             = d->A;
8035       b             = d->B;
8036     }
8037     if (filter >= 0 || scale) {
8038       /* take absolute value of each entry */
8039       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8040         MatInfo      info;
8041         PetscScalar *avals;
8042 
8043         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8044         PetscCall(MatSeqAIJGetArray(c, &avals));
8045         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8046         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8047       }
8048     }
8049   }
8050   if (symmetrize) {
8051     PetscBool isset, issym;
8052 
8053     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8054     if (!isset || !issym) {
8055       Mat matTrans;
8056 
8057       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8058       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8059       PetscCall(MatDestroy(&matTrans));
8060     }
8061     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8062   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8063   if (scale) {
8064     /* scale c for all diagonal values = 1 or -1 */
8065     Vec diag;
8066 
8067     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8068     PetscCall(MatGetDiagonal(Gmat, diag));
8069     PetscCall(VecReciprocal(diag));
8070     PetscCall(VecSqrtAbs(diag));
8071     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8072     PetscCall(VecDestroy(&diag));
8073   }
8074   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8075   if (filter >= 0) {
8076     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8077     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8078   }
8079   *a_Gmat = Gmat;
8080   PetscFunctionReturn(PETSC_SUCCESS);
8081 }
8082 
8083 /*
8084     Special version for direct calls from Fortran
8085 */
8086 
8087 /* Change these macros so can be used in void function */
8088 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8089 #undef PetscCall
8090 #define PetscCall(...) \
8091   do { \
8092     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8093     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8094       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8095       return; \
8096     } \
8097   } while (0)
8098 
8099 #undef SETERRQ
8100 #define SETERRQ(comm, ierr, ...) \
8101   do { \
8102     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8103     return; \
8104   } while (0)
8105 
8106 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8107   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8108 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8109   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8110 #else
8111 #endif
8112 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8113 {
8114   Mat         mat = *mmat;
8115   PetscInt    m = *mm, n = *mn;
8116   InsertMode  addv = *maddv;
8117   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8118   PetscScalar value;
8119 
8120   MatCheckPreallocated(mat, 1);
8121   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8122   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8123   {
8124     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8125     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8126     PetscBool roworiented = aij->roworiented;
8127 
8128     /* Some Variables required in the macro */
8129     Mat         A     = aij->A;
8130     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8131     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8132     MatScalar  *aa;
8133     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8134     Mat         B                 = aij->B;
8135     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8136     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8137     MatScalar  *ba;
8138     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8139      * cannot use "#if defined" inside a macro. */
8140     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8141 
8142     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8143     PetscInt   nonew = a->nonew;
8144     MatScalar *ap1, *ap2;
8145 
8146     PetscFunctionBegin;
8147     PetscCall(MatSeqAIJGetArray(A, &aa));
8148     PetscCall(MatSeqAIJGetArray(B, &ba));
8149     for (i = 0; i < m; i++) {
8150       if (im[i] < 0) continue;
8151       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8152       if (im[i] >= rstart && im[i] < rend) {
8153         row      = im[i] - rstart;
8154         lastcol1 = -1;
8155         rp1      = aj + ai[row];
8156         ap1      = aa + ai[row];
8157         rmax1    = aimax[row];
8158         nrow1    = ailen[row];
8159         low1     = 0;
8160         high1    = nrow1;
8161         lastcol2 = -1;
8162         rp2      = bj + bi[row];
8163         ap2      = ba + bi[row];
8164         rmax2    = bimax[row];
8165         nrow2    = bilen[row];
8166         low2     = 0;
8167         high2    = nrow2;
8168 
8169         for (j = 0; j < n; j++) {
8170           if (roworiented) value = v[i * n + j];
8171           else value = v[i + j * m];
8172           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8173           if (in[j] >= cstart && in[j] < cend) {
8174             col = in[j] - cstart;
8175             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8176           } else if (in[j] < 0) continue;
8177           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8178             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8179           } else {
8180             if (mat->was_assembled) {
8181               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8182 #if defined(PETSC_USE_CTABLE)
8183               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8184               col--;
8185 #else
8186               col = aij->colmap[in[j]] - 1;
8187 #endif
8188               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8189                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8190                 col = in[j];
8191                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8192                 B        = aij->B;
8193                 b        = (Mat_SeqAIJ *)B->data;
8194                 bimax    = b->imax;
8195                 bi       = b->i;
8196                 bilen    = b->ilen;
8197                 bj       = b->j;
8198                 rp2      = bj + bi[row];
8199                 ap2      = ba + bi[row];
8200                 rmax2    = bimax[row];
8201                 nrow2    = bilen[row];
8202                 low2     = 0;
8203                 high2    = nrow2;
8204                 bm       = aij->B->rmap->n;
8205                 ba       = b->a;
8206                 inserted = PETSC_FALSE;
8207               }
8208             } else col = in[j];
8209             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8210           }
8211         }
8212       } else if (!aij->donotstash) {
8213         if (roworiented) {
8214           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8215         } else {
8216           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8217         }
8218       }
8219     }
8220     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8221     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8222   }
8223   PetscFunctionReturnVoid();
8224 }
8225 
8226 /* Undefining these here since they were redefined from their original definition above! No
8227  * other PETSc functions should be defined past this point, as it is impossible to recover the
8228  * original definitions */
8229 #undef PetscCall
8230 #undef SETERRQ
8231