xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e4094ef18e7e53fda86cf35f3a47fda48a8e77d8)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14 #if defined(PETSC_USE_LOG)
15   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
16 #endif
17   PetscCall(MatStashDestroy_Private(&mat->stash));
18   PetscCall(VecDestroy(&aij->diag));
19   PetscCall(MatDestroy(&aij->A));
20   PetscCall(MatDestroy(&aij->B));
21 #if defined(PETSC_USE_CTABLE)
22   PetscCall(PetscHMapIDestroy(&aij->colmap));
23 #else
24   PetscCall(PetscFree(aij->colmap));
25 #endif
26   PetscCall(PetscFree(aij->garray));
27   PetscCall(VecDestroy(&aij->lvec));
28   PetscCall(VecScatterDestroy(&aij->Mvctx));
29   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
30   PetscCall(PetscFree(aij->ld));
31 
32   PetscCall(PetscFree(mat->data));
33 
34   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
35   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
36 
37   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
47 #if defined(PETSC_HAVE_CUDA)
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
49 #endif
50 #if defined(PETSC_HAVE_HIP)
51   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
52 #endif
53 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
55 #endif
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
57 #if defined(PETSC_HAVE_ELEMENTAL)
58   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
59 #endif
60 #if defined(PETSC_HAVE_SCALAPACK)
61   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
62 #endif
63 #if defined(PETSC_HAVE_HYPRE)
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
66 #endif
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
73 #if defined(PETSC_HAVE_MKL_SPARSE)
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
75 #endif
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
79   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
80   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
85 #define TYPE AIJ
86 #define TYPE_AIJ
87 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
88 #undef TYPE
89 #undef TYPE_AIJ
90 
91 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
92 {
93   Mat B;
94 
95   PetscFunctionBegin;
96   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
97   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
98   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
99   PetscCall(MatDestroy(&B));
100   PetscFunctionReturn(PETSC_SUCCESS);
101 }
102 
103 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
104 {
105   Mat B;
106 
107   PetscFunctionBegin;
108   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
109   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
110   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
111   PetscFunctionReturn(PETSC_SUCCESS);
112 }
113 
114 /*MC
115    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
116 
117    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
118    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
119   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
120   for communicators controlling multiple processes.  It is recommended that you call both of
121   the above preallocation routines for simplicity.
122 
123    Options Database Key:
124 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
125 
126   Developer Note:
127   Level: beginner
128 
129     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
130    enough exist.
131 
132 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
133 M*/
134 
135 /*MC
136    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
137 
138    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
139    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
140    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
141   for communicators controlling multiple processes.  It is recommended that you call both of
142   the above preallocation routines for simplicity.
143 
144    Options Database Key:
145 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
146 
147   Level: beginner
148 
149 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
150 M*/
151 
152 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
153 {
154   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
155 
156   PetscFunctionBegin;
157 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
158   A->boundtocpu = flg;
159 #endif
160   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
161   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
162 
163   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
164    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
165    * to differ from the parent matrix. */
166   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
167   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
168 
169   PetscFunctionReturn(PETSC_SUCCESS);
170 }
171 
172 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
173 {
174   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
175 
176   PetscFunctionBegin;
177   if (mat->A) {
178     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
179     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
185 {
186   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
187   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
188   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
189   const PetscInt  *ia, *ib;
190   const MatScalar *aa, *bb, *aav, *bav;
191   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
192   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
193 
194   PetscFunctionBegin;
195   *keptrows = NULL;
196 
197   ia = a->i;
198   ib = b->i;
199   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
200   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
201   for (i = 0; i < m; i++) {
202     na = ia[i + 1] - ia[i];
203     nb = ib[i + 1] - ib[i];
204     if (!na && !nb) {
205       cnt++;
206       goto ok1;
207     }
208     aa = aav + ia[i];
209     for (j = 0; j < na; j++) {
210       if (aa[j] != 0.0) goto ok1;
211     }
212     bb = bav + ib[i];
213     for (j = 0; j < nb; j++) {
214       if (bb[j] != 0.0) goto ok1;
215     }
216     cnt++;
217   ok1:;
218   }
219   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
220   if (!n0rows) {
221     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
222     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
223     PetscFunctionReturn(PETSC_SUCCESS);
224   }
225   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
226   cnt = 0;
227   for (i = 0; i < m; i++) {
228     na = ia[i + 1] - ia[i];
229     nb = ib[i + 1] - ib[i];
230     if (!na && !nb) continue;
231     aa = aav + ia[i];
232     for (j = 0; j < na; j++) {
233       if (aa[j] != 0.0) {
234         rows[cnt++] = rstart + i;
235         goto ok2;
236       }
237     }
238     bb = bav + ib[i];
239     for (j = 0; j < nb; j++) {
240       if (bb[j] != 0.0) {
241         rows[cnt++] = rstart + i;
242         goto ok2;
243       }
244     }
245   ok2:;
246   }
247   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
248   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
249   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
250   PetscFunctionReturn(PETSC_SUCCESS);
251 }
252 
253 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
254 {
255   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
256   PetscBool   cong;
257 
258   PetscFunctionBegin;
259   PetscCall(MatHasCongruentLayouts(Y, &cong));
260   if (Y->assembled && cong) {
261     PetscCall(MatDiagonalSet(aij->A, D, is));
262   } else {
263     PetscCall(MatDiagonalSet_Default(Y, D, is));
264   }
265   PetscFunctionReturn(PETSC_SUCCESS);
266 }
267 
268 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
269 {
270   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
271   PetscInt    i, rstart, nrows, *rows;
272 
273   PetscFunctionBegin;
274   *zrows = NULL;
275   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
276   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
277   for (i = 0; i < nrows; i++) rows[i] += rstart;
278   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
279   PetscFunctionReturn(PETSC_SUCCESS);
280 }
281 
282 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
283 {
284   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
285   PetscInt           i, m, n, *garray = aij->garray;
286   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
287   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
288   PetscReal         *work;
289   const PetscScalar *dummy;
290 
291   PetscFunctionBegin;
292   PetscCall(MatGetSize(A, &m, &n));
293   PetscCall(PetscCalloc1(n, &work));
294   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
296   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
297   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
298   if (type == NORM_2) {
299     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
300     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
301   } else if (type == NORM_1) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
304   } else if (type == NORM_INFINITY) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
307   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
310   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
311     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
312     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
313   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
314   if (type == NORM_INFINITY) {
315     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
316   } else {
317     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
318   }
319   PetscCall(PetscFree(work));
320   if (type == NORM_2) {
321     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
322   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
323     for (i = 0; i < n; i++) reductions[i] /= m;
324   }
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
329 {
330   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
331   IS              sis, gis;
332   const PetscInt *isis, *igis;
333   PetscInt        n, *iis, nsis, ngis, rstart, i;
334 
335   PetscFunctionBegin;
336   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
337   PetscCall(MatFindNonzeroRows(a->B, &gis));
338   PetscCall(ISGetSize(gis, &ngis));
339   PetscCall(ISGetSize(sis, &nsis));
340   PetscCall(ISGetIndices(sis, &isis));
341   PetscCall(ISGetIndices(gis, &igis));
342 
343   PetscCall(PetscMalloc1(ngis + nsis, &iis));
344   PetscCall(PetscArraycpy(iis, igis, ngis));
345   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
346   n = ngis + nsis;
347   PetscCall(PetscSortRemoveDupsInt(&n, iis));
348   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
349   for (i = 0; i < n; i++) iis[i] += rstart;
350   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
351 
352   PetscCall(ISRestoreIndices(sis, &isis));
353   PetscCall(ISRestoreIndices(gis, &igis));
354   PetscCall(ISDestroy(&sis));
355   PetscCall(ISDestroy(&gis));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 /*
360   Local utility routine that creates a mapping from the global column
361 number to the local number in the off-diagonal part of the local
362 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
363 a slightly higher hash table cost; without it it is not scalable (each processor
364 has an order N integer array but is fast to access.
365 */
366 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
367 {
368   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
369   PetscInt    n   = aij->B->cmap->n, i;
370 
371   PetscFunctionBegin;
372   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
373 #if defined(PETSC_USE_CTABLE)
374   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
375   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
376 #else
377   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
378   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
379 #endif
380   PetscFunctionReturn(PETSC_SUCCESS);
381 }
382 
383 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
384   { \
385     if (col <= lastcol1) low1 = 0; \
386     else high1 = nrow1; \
387     lastcol1 = col; \
388     while (high1 - low1 > 5) { \
389       t = (low1 + high1) / 2; \
390       if (rp1[t] > col) high1 = t; \
391       else low1 = t; \
392     } \
393     for (_i = low1; _i < high1; _i++) { \
394       if (rp1[_i] > col) break; \
395       if (rp1[_i] == col) { \
396         if (addv == ADD_VALUES) { \
397           ap1[_i] += value; \
398           /* Not sure LogFlops will slow dow the code or not */ \
399           (void)PetscLogFlops(1.0); \
400         } else ap1[_i] = value; \
401         goto a_noinsert; \
402       } \
403     } \
404     if (value == 0.0 && ignorezeroentries && row != col) { \
405       low1  = 0; \
406       high1 = nrow1; \
407       goto a_noinsert; \
408     } \
409     if (nonew == 1) { \
410       low1  = 0; \
411       high1 = nrow1; \
412       goto a_noinsert; \
413     } \
414     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
415     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
416     N = nrow1++ - 1; \
417     a->nz++; \
418     high1++; \
419     /* shift up all the later entries in this row */ \
420     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
421     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
422     rp1[_i] = col; \
423     ap1[_i] = value; \
424     A->nonzerostate++; \
425   a_noinsert:; \
426     ailen[row] = nrow1; \
427   }
428 
429 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
430   { \
431     if (col <= lastcol2) low2 = 0; \
432     else high2 = nrow2; \
433     lastcol2 = col; \
434     while (high2 - low2 > 5) { \
435       t = (low2 + high2) / 2; \
436       if (rp2[t] > col) high2 = t; \
437       else low2 = t; \
438     } \
439     for (_i = low2; _i < high2; _i++) { \
440       if (rp2[_i] > col) break; \
441       if (rp2[_i] == col) { \
442         if (addv == ADD_VALUES) { \
443           ap2[_i] += value; \
444           (void)PetscLogFlops(1.0); \
445         } else ap2[_i] = value; \
446         goto b_noinsert; \
447       } \
448     } \
449     if (value == 0.0 && ignorezeroentries) { \
450       low2  = 0; \
451       high2 = nrow2; \
452       goto b_noinsert; \
453     } \
454     if (nonew == 1) { \
455       low2  = 0; \
456       high2 = nrow2; \
457       goto b_noinsert; \
458     } \
459     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
460     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
461     N = nrow2++ - 1; \
462     b->nz++; \
463     high2++; \
464     /* shift up all the later entries in this row */ \
465     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
466     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
467     rp2[_i] = col; \
468     ap2[_i] = value; \
469     B->nonzerostate++; \
470   b_noinsert:; \
471     bilen[row] = nrow2; \
472   }
473 
474 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
475 {
476   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
477   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
478   PetscInt     l, *garray                         = mat->garray, diag;
479   PetscScalar *aa, *ba;
480 
481   PetscFunctionBegin;
482   /* code only works for square matrices A */
483 
484   /* find size of row to the left of the diagonal part */
485   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
486   row = row - diag;
487   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
488     if (garray[b->j[b->i[row] + l]] > diag) break;
489   }
490   if (l) {
491     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
492     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
493     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
494   }
495 
496   /* diagonal part */
497   if (a->i[row + 1] - a->i[row]) {
498     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
499     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
500     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
501   }
502 
503   /* right of diagonal part */
504   if (b->i[row + 1] - b->i[row] - l) {
505     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
506     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
507     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
508   }
509   PetscFunctionReturn(PETSC_SUCCESS);
510 }
511 
512 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
513 {
514   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
515   PetscScalar value = 0.0;
516   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
517   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
518   PetscBool   roworiented = aij->roworiented;
519 
520   /* Some Variables required in the macro */
521   Mat         A     = aij->A;
522   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
523   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
524   PetscBool   ignorezeroentries = a->ignorezeroentries;
525   Mat         B                 = aij->B;
526   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
527   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
528   MatScalar  *aa, *ba;
529   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
530   PetscInt    nonew;
531   MatScalar  *ap1, *ap2;
532 
533   PetscFunctionBegin;
534   PetscCall(MatSeqAIJGetArray(A, &aa));
535   PetscCall(MatSeqAIJGetArray(B, &ba));
536   for (i = 0; i < m; i++) {
537     if (im[i] < 0) continue;
538     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
539     if (im[i] >= rstart && im[i] < rend) {
540       row      = im[i] - rstart;
541       lastcol1 = -1;
542       rp1      = aj + ai[row];
543       ap1      = aa + ai[row];
544       rmax1    = aimax[row];
545       nrow1    = ailen[row];
546       low1     = 0;
547       high1    = nrow1;
548       lastcol2 = -1;
549       rp2      = bj + bi[row];
550       ap2      = ba + bi[row];
551       rmax2    = bimax[row];
552       nrow2    = bilen[row];
553       low2     = 0;
554       high2    = nrow2;
555 
556       for (j = 0; j < n; j++) {
557         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
558         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
559         if (in[j] >= cstart && in[j] < cend) {
560           col   = in[j] - cstart;
561           nonew = a->nonew;
562           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
563         } else if (in[j] < 0) {
564           continue;
565         } else {
566           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
567           if (mat->was_assembled) {
568             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
569 #if defined(PETSC_USE_CTABLE)
570             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
571             col--;
572 #else
573             col = aij->colmap[in[j]] - 1;
574 #endif
575             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
576               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
577               col = in[j];
578               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
579               B     = aij->B;
580               b     = (Mat_SeqAIJ *)B->data;
581               bimax = b->imax;
582               bi    = b->i;
583               bilen = b->ilen;
584               bj    = b->j;
585               ba    = b->a;
586               rp2   = bj + bi[row];
587               ap2   = ba + bi[row];
588               rmax2 = bimax[row];
589               nrow2 = bilen[row];
590               low2  = 0;
591               high2 = nrow2;
592               bm    = aij->B->rmap->n;
593               ba    = b->a;
594             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
595               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
596                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
597               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
598             }
599           } else col = in[j];
600           nonew = b->nonew;
601           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
602         }
603       }
604     } else {
605       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
606       if (!aij->donotstash) {
607         mat->assembled = PETSC_FALSE;
608         if (roworiented) {
609           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
610         } else {
611           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
612         }
613       }
614     }
615   }
616   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
617   PetscCall(MatSeqAIJRestoreArray(B, &ba));
618   PetscFunctionReturn(PETSC_SUCCESS);
619 }
620 
621 /*
622     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
623     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
624     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
625 */
626 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
627 {
628   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
629   Mat         A      = aij->A; /* diagonal part of the matrix */
630   Mat         B      = aij->B; /* offdiagonal part of the matrix */
631   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
632   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
633   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
634   PetscInt   *ailen = a->ilen, *aj = a->j;
635   PetscInt   *bilen = b->ilen, *bj = b->j;
636   PetscInt    am          = aij->A->rmap->n, j;
637   PetscInt    diag_so_far = 0, dnz;
638   PetscInt    offd_so_far = 0, onz;
639 
640   PetscFunctionBegin;
641   /* Iterate over all rows of the matrix */
642   for (j = 0; j < am; j++) {
643     dnz = onz = 0;
644     /*  Iterate over all non-zero columns of the current row */
645     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
646       /* If column is in the diagonal */
647       if (mat_j[col] >= cstart && mat_j[col] < cend) {
648         aj[diag_so_far++] = mat_j[col] - cstart;
649         dnz++;
650       } else { /* off-diagonal entries */
651         bj[offd_so_far++] = mat_j[col];
652         onz++;
653       }
654     }
655     ailen[j] = dnz;
656     bilen[j] = onz;
657   }
658   PetscFunctionReturn(PETSC_SUCCESS);
659 }
660 
661 /*
662     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
663     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
664     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
665     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
666     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
667 */
668 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
669 {
670   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
671   Mat          A    = aij->A; /* diagonal part of the matrix */
672   Mat          B    = aij->B; /* offdiagonal part of the matrix */
673   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
674   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
675   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
676   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
677   PetscInt    *ailen = a->ilen, *aj = a->j;
678   PetscInt    *bilen = b->ilen, *bj = b->j;
679   PetscInt     am          = aij->A->rmap->n, j;
680   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
681   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
682   PetscScalar *aa = a->a, *ba = b->a;
683 
684   PetscFunctionBegin;
685   /* Iterate over all rows of the matrix */
686   for (j = 0; j < am; j++) {
687     dnz_row = onz_row = 0;
688     rowstart_offd     = full_offd_i[j];
689     rowstart_diag     = full_diag_i[j];
690     /*  Iterate over all non-zero columns of the current row */
691     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
695         aa[rowstart_diag + dnz_row] = mat_a[col];
696         dnz_row++;
697       } else { /* off-diagonal entries */
698         bj[rowstart_offd + onz_row] = mat_j[col];
699         ba[rowstart_offd + onz_row] = mat_a[col];
700         onz_row++;
701       }
702     }
703     ailen[j] = dnz_row;
704     bilen[j] = onz_row;
705   }
706   PetscFunctionReturn(PETSC_SUCCESS);
707 }
708 
709 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
710 {
711   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
712   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
713   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
714 
715   PetscFunctionBegin;
716   for (i = 0; i < m; i++) {
717     if (idxm[i] < 0) continue; /* negative row */
718     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
719     if (idxm[i] >= rstart && idxm[i] < rend) {
720       row = idxm[i] - rstart;
721       for (j = 0; j < n; j++) {
722         if (idxn[j] < 0) continue; /* negative column */
723         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
724         if (idxn[j] >= cstart && idxn[j] < cend) {
725           col = idxn[j] - cstart;
726           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
727         } else {
728           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
729 #if defined(PETSC_USE_CTABLE)
730           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
731           col--;
732 #else
733           col = aij->colmap[idxn[j]] - 1;
734 #endif
735           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
736           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
737         }
738       }
739     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
740   }
741   PetscFunctionReturn(PETSC_SUCCESS);
742 }
743 
744 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
745 {
746   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
747   PetscInt    nstash, reallocs;
748 
749   PetscFunctionBegin;
750   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
751 
752   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
753   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
754   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
755   PetscFunctionReturn(PETSC_SUCCESS);
756 }
757 
758 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
759 {
760   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
761   PetscMPIInt  n;
762   PetscInt     i, j, rstart, ncols, flg;
763   PetscInt    *row, *col;
764   PetscBool    other_disassembled;
765   PetscScalar *val;
766 
767   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
768 
769   PetscFunctionBegin;
770   if (!aij->donotstash && !mat->nooffprocentries) {
771     while (1) {
772       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
773       if (!flg) break;
774 
775       for (i = 0; i < n;) {
776         /* Now identify the consecutive vals belonging to the same row */
777         for (j = i, rstart = row[j]; j < n; j++) {
778           if (row[j] != rstart) break;
779         }
780         if (j < n) ncols = j - i;
781         else ncols = n - i;
782         /* Now assemble all these values with a single function call */
783         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
784         i = j;
785       }
786     }
787     PetscCall(MatStashScatterEnd_Private(&mat->stash));
788   }
789 #if defined(PETSC_HAVE_DEVICE)
790   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
791   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
792   if (mat->boundtocpu) {
793     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
794     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
795   }
796 #endif
797   PetscCall(MatAssemblyBegin(aij->A, mode));
798   PetscCall(MatAssemblyEnd(aij->A, mode));
799 
800   /* determine if any processor has disassembled, if so we must
801      also disassemble ourself, in order that we may reassemble. */
802   /*
803      if nonzero structure of submatrix B cannot change then we know that
804      no processor disassembled thus we can skip this stuff
805   */
806   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
807     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
808     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
809       PetscCall(MatDisAssemble_MPIAIJ(mat));
810     }
811   }
812   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
813   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
814 #if defined(PETSC_HAVE_DEVICE)
815   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
816 #endif
817   PetscCall(MatAssemblyBegin(aij->B, mode));
818   PetscCall(MatAssemblyEnd(aij->B, mode));
819 
820   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
821 
822   aij->rowvalues = NULL;
823 
824   PetscCall(VecDestroy(&aij->diag));
825 
826   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
827   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
828     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
829     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
830   }
831 #if defined(PETSC_HAVE_DEVICE)
832   mat->offloadmask = PETSC_OFFLOAD_BOTH;
833 #endif
834   PetscFunctionReturn(PETSC_SUCCESS);
835 }
836 
837 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
838 {
839   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
840 
841   PetscFunctionBegin;
842   PetscCall(MatZeroEntries(l->A));
843   PetscCall(MatZeroEntries(l->B));
844   PetscFunctionReturn(PETSC_SUCCESS);
845 }
846 
847 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
848 {
849   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
850   PetscObjectState sA, sB;
851   PetscInt        *lrows;
852   PetscInt         r, len;
853   PetscBool        cong, lch, gch;
854 
855   PetscFunctionBegin;
856   /* get locally owned rows */
857   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
858   PetscCall(MatHasCongruentLayouts(A, &cong));
859   /* fix right hand side if needed */
860   if (x && b) {
861     const PetscScalar *xx;
862     PetscScalar       *bb;
863 
864     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
865     PetscCall(VecGetArrayRead(x, &xx));
866     PetscCall(VecGetArray(b, &bb));
867     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
868     PetscCall(VecRestoreArrayRead(x, &xx));
869     PetscCall(VecRestoreArray(b, &bb));
870   }
871 
872   sA = mat->A->nonzerostate;
873   sB = mat->B->nonzerostate;
874 
875   if (diag != 0.0 && cong) {
876     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
877     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
878   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
879     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
880     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
881     PetscInt    nnwA, nnwB;
882     PetscBool   nnzA, nnzB;
883 
884     nnwA = aijA->nonew;
885     nnwB = aijB->nonew;
886     nnzA = aijA->keepnonzeropattern;
887     nnzB = aijB->keepnonzeropattern;
888     if (!nnzA) {
889       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
890       aijA->nonew = 0;
891     }
892     if (!nnzB) {
893       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
894       aijB->nonew = 0;
895     }
896     /* Must zero here before the next loop */
897     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
898     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
899     for (r = 0; r < len; ++r) {
900       const PetscInt row = lrows[r] + A->rmap->rstart;
901       if (row >= A->cmap->N) continue;
902       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
903     }
904     aijA->nonew = nnwA;
905     aijB->nonew = nnwB;
906   } else {
907     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
908     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
909   }
910   PetscCall(PetscFree(lrows));
911   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
912   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
913 
914   /* reduce nonzerostate */
915   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
916   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
917   if (gch) A->nonzerostate++;
918   PetscFunctionReturn(PETSC_SUCCESS);
919 }
920 
921 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
922 {
923   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
924   PetscMPIInt        n = A->rmap->n;
925   PetscInt           i, j, r, m, len = 0;
926   PetscInt          *lrows, *owners = A->rmap->range;
927   PetscMPIInt        p = 0;
928   PetscSFNode       *rrows;
929   PetscSF            sf;
930   const PetscScalar *xx;
931   PetscScalar       *bb, *mask, *aij_a;
932   Vec                xmask, lmask;
933   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
934   const PetscInt    *aj, *ii, *ridx;
935   PetscScalar       *aa;
936 
937   PetscFunctionBegin;
938   /* Create SF where leaves are input rows and roots are owned rows */
939   PetscCall(PetscMalloc1(n, &lrows));
940   for (r = 0; r < n; ++r) lrows[r] = -1;
941   PetscCall(PetscMalloc1(N, &rrows));
942   for (r = 0; r < N; ++r) {
943     const PetscInt idx = rows[r];
944     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
945     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
946       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
947     }
948     rrows[r].rank  = p;
949     rrows[r].index = rows[r] - owners[p];
950   }
951   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
952   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
953   /* Collect flags for rows to be zeroed */
954   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
955   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
956   PetscCall(PetscSFDestroy(&sf));
957   /* Compress and put in row numbers */
958   for (r = 0; r < n; ++r)
959     if (lrows[r] >= 0) lrows[len++] = r;
960   /* zero diagonal part of matrix */
961   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
962   /* handle off diagonal part of matrix */
963   PetscCall(MatCreateVecs(A, &xmask, NULL));
964   PetscCall(VecDuplicate(l->lvec, &lmask));
965   PetscCall(VecGetArray(xmask, &bb));
966   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
967   PetscCall(VecRestoreArray(xmask, &bb));
968   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
969   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
970   PetscCall(VecDestroy(&xmask));
971   if (x && b) { /* this code is buggy when the row and column layout don't match */
972     PetscBool cong;
973 
974     PetscCall(MatHasCongruentLayouts(A, &cong));
975     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
976     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
977     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
978     PetscCall(VecGetArrayRead(l->lvec, &xx));
979     PetscCall(VecGetArray(b, &bb));
980   }
981   PetscCall(VecGetArray(lmask, &mask));
982   /* remove zeroed rows of off diagonal matrix */
983   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
984   ii = aij->i;
985   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
986   /* loop over all elements of off process part of matrix zeroing removed columns*/
987   if (aij->compressedrow.use) {
988     m    = aij->compressedrow.nrows;
989     ii   = aij->compressedrow.i;
990     ridx = aij->compressedrow.rindex;
991     for (i = 0; i < m; i++) {
992       n  = ii[i + 1] - ii[i];
993       aj = aij->j + ii[i];
994       aa = aij_a + ii[i];
995 
996       for (j = 0; j < n; j++) {
997         if (PetscAbsScalar(mask[*aj])) {
998           if (b) bb[*ridx] -= *aa * xx[*aj];
999           *aa = 0.0;
1000         }
1001         aa++;
1002         aj++;
1003       }
1004       ridx++;
1005     }
1006   } else { /* do not use compressed row format */
1007     m = l->B->rmap->n;
1008     for (i = 0; i < m; i++) {
1009       n  = ii[i + 1] - ii[i];
1010       aj = aij->j + ii[i];
1011       aa = aij_a + ii[i];
1012       for (j = 0; j < n; j++) {
1013         if (PetscAbsScalar(mask[*aj])) {
1014           if (b) bb[i] -= *aa * xx[*aj];
1015           *aa = 0.0;
1016         }
1017         aa++;
1018         aj++;
1019       }
1020     }
1021   }
1022   if (x && b) {
1023     PetscCall(VecRestoreArray(b, &bb));
1024     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1025   }
1026   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1027   PetscCall(VecRestoreArray(lmask, &mask));
1028   PetscCall(VecDestroy(&lmask));
1029   PetscCall(PetscFree(lrows));
1030 
1031   /* only change matrix nonzero state if pattern was allowed to be changed */
1032   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1033     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1034     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1035   }
1036   PetscFunctionReturn(PETSC_SUCCESS);
1037 }
1038 
1039 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1040 {
1041   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1042   PetscInt    nt;
1043   VecScatter  Mvctx = a->Mvctx;
1044 
1045   PetscFunctionBegin;
1046   PetscCall(VecGetLocalSize(xx, &nt));
1047   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1048   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1049   PetscUseTypeMethod(a->A, mult, xx, yy);
1050   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1051   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1056 {
1057   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1058 
1059   PetscFunctionBegin;
1060   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1061   PetscFunctionReturn(PETSC_SUCCESS);
1062 }
1063 
1064 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1065 {
1066   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1067   VecScatter  Mvctx = a->Mvctx;
1068 
1069   PetscFunctionBegin;
1070   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1071   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1072   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1073   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1078 {
1079   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1080 
1081   PetscFunctionBegin;
1082   /* do nondiagonal part */
1083   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1084   /* do local part */
1085   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1086   /* add partial results together */
1087   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1088   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1089   PetscFunctionReturn(PETSC_SUCCESS);
1090 }
1091 
1092 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1093 {
1094   MPI_Comm    comm;
1095   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1096   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1097   IS          Me, Notme;
1098   PetscInt    M, N, first, last, *notme, i;
1099   PetscBool   lf;
1100   PetscMPIInt size;
1101 
1102   PetscFunctionBegin;
1103   /* Easy test: symmetric diagonal block */
1104   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1105   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1106   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1107   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1108   PetscCallMPI(MPI_Comm_size(comm, &size));
1109   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1110 
1111   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1112   PetscCall(MatGetSize(Amat, &M, &N));
1113   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1114   PetscCall(PetscMalloc1(N - last + first, &notme));
1115   for (i = 0; i < first; i++) notme[i] = i;
1116   for (i = last; i < M; i++) notme[i - last + first] = i;
1117   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1118   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1119   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1120   Aoff = Aoffs[0];
1121   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1122   Boff = Boffs[0];
1123   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1124   PetscCall(MatDestroyMatrices(1, &Aoffs));
1125   PetscCall(MatDestroyMatrices(1, &Boffs));
1126   PetscCall(ISDestroy(&Me));
1127   PetscCall(ISDestroy(&Notme));
1128   PetscCall(PetscFree(notme));
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1133 {
1134   PetscFunctionBegin;
1135   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1136   PetscFunctionReturn(PETSC_SUCCESS);
1137 }
1138 
1139 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1140 {
1141   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1142 
1143   PetscFunctionBegin;
1144   /* do nondiagonal part */
1145   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1146   /* do local part */
1147   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1148   /* add partial results together */
1149   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1150   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1151   PetscFunctionReturn(PETSC_SUCCESS);
1152 }
1153 
1154 /*
1155   This only works correctly for square matrices where the subblock A->A is the
1156    diagonal block
1157 */
1158 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1159 {
1160   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1161 
1162   PetscFunctionBegin;
1163   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1164   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1165   PetscCall(MatGetDiagonal(a->A, v));
1166   PetscFunctionReturn(PETSC_SUCCESS);
1167 }
1168 
1169 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1170 {
1171   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1172 
1173   PetscFunctionBegin;
1174   PetscCall(MatScale(a->A, aa));
1175   PetscCall(MatScale(a->B, aa));
1176   PetscFunctionReturn(PETSC_SUCCESS);
1177 }
1178 
1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1180 {
1181   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1182   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1183   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1184   const PetscInt    *garray = aij->garray;
1185   const PetscScalar *aa, *ba;
1186   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1187   PetscInt64         nz, hnz;
1188   PetscInt          *rowlens;
1189   PetscInt          *colidxs;
1190   PetscScalar       *matvals;
1191   PetscMPIInt        rank;
1192 
1193   PetscFunctionBegin;
1194   PetscCall(PetscViewerSetUp(viewer));
1195 
1196   M  = mat->rmap->N;
1197   N  = mat->cmap->N;
1198   m  = mat->rmap->n;
1199   rs = mat->rmap->rstart;
1200   cs = mat->cmap->rstart;
1201   nz = A->nz + B->nz;
1202 
1203   /* write matrix header */
1204   header[0] = MAT_FILE_CLASSID;
1205   header[1] = M;
1206   header[2] = N;
1207   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1208   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1209   if (rank == 0) {
1210     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1211     else header[3] = (PetscInt)hnz;
1212   }
1213   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1214 
1215   /* fill in and store row lengths  */
1216   PetscCall(PetscMalloc1(m, &rowlens));
1217   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1218   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1219   PetscCall(PetscFree(rowlens));
1220 
1221   /* fill in and store column indices */
1222   PetscCall(PetscMalloc1(nz, &colidxs));
1223   for (cnt = 0, i = 0; i < m; i++) {
1224     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1225       if (garray[B->j[jb]] > cs) break;
1226       colidxs[cnt++] = garray[B->j[jb]];
1227     }
1228     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1229     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1230   }
1231   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1232   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1233   PetscCall(PetscFree(colidxs));
1234 
1235   /* fill in and store nonzero values */
1236   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1237   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1238   PetscCall(PetscMalloc1(nz, &matvals));
1239   for (cnt = 0, i = 0; i < m; i++) {
1240     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1241       if (garray[B->j[jb]] > cs) break;
1242       matvals[cnt++] = ba[jb];
1243     }
1244     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1245     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1246   }
1247   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1248   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1249   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1250   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1251   PetscCall(PetscFree(matvals));
1252 
1253   /* write block size option to the viewer's .info file */
1254   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1255   PetscFunctionReturn(PETSC_SUCCESS);
1256 }
1257 
1258 #include <petscdraw.h>
1259 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1260 {
1261   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1262   PetscMPIInt       rank = aij->rank, size = aij->size;
1263   PetscBool         isdraw, iascii, isbinary;
1264   PetscViewer       sviewer;
1265   PetscViewerFormat format;
1266 
1267   PetscFunctionBegin;
1268   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1269   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1270   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1271   if (iascii) {
1272     PetscCall(PetscViewerGetFormat(viewer, &format));
1273     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1274       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1275       PetscCall(PetscMalloc1(size, &nz));
1276       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1277       for (i = 0; i < (PetscInt)size; i++) {
1278         nmax = PetscMax(nmax, nz[i]);
1279         nmin = PetscMin(nmin, nz[i]);
1280         navg += nz[i];
1281       }
1282       PetscCall(PetscFree(nz));
1283       navg = navg / size;
1284       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1285       PetscFunctionReturn(PETSC_SUCCESS);
1286     }
1287     PetscCall(PetscViewerGetFormat(viewer, &format));
1288     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1289       MatInfo   info;
1290       PetscInt *inodes = NULL;
1291 
1292       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1293       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1294       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1295       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1296       if (!inodes) {
1297         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1298                                                      (double)info.memory));
1299       } else {
1300         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1301                                                      (double)info.memory));
1302       }
1303       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1304       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1305       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1306       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1307       PetscCall(PetscViewerFlush(viewer));
1308       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1309       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1310       PetscCall(VecScatterView(aij->Mvctx, viewer));
1311       PetscFunctionReturn(PETSC_SUCCESS);
1312     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1313       PetscInt inodecount, inodelimit, *inodes;
1314       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1315       if (inodes) {
1316         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1317       } else {
1318         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1319       }
1320       PetscFunctionReturn(PETSC_SUCCESS);
1321     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1322       PetscFunctionReturn(PETSC_SUCCESS);
1323     }
1324   } else if (isbinary) {
1325     if (size == 1) {
1326       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1327       PetscCall(MatView(aij->A, viewer));
1328     } else {
1329       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1330     }
1331     PetscFunctionReturn(PETSC_SUCCESS);
1332   } else if (iascii && size == 1) {
1333     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1334     PetscCall(MatView(aij->A, viewer));
1335     PetscFunctionReturn(PETSC_SUCCESS);
1336   } else if (isdraw) {
1337     PetscDraw draw;
1338     PetscBool isnull;
1339     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1340     PetscCall(PetscDrawIsNull(draw, &isnull));
1341     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1342   }
1343 
1344   { /* assemble the entire matrix onto first processor */
1345     Mat A = NULL, Av;
1346     IS  isrow, iscol;
1347 
1348     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1349     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1350     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1351     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1352     /*  The commented code uses MatCreateSubMatrices instead */
1353     /*
1354     Mat *AA, A = NULL, Av;
1355     IS  isrow,iscol;
1356 
1357     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1358     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1359     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1360     if (rank == 0) {
1361        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1362        A    = AA[0];
1363        Av   = AA[0];
1364     }
1365     PetscCall(MatDestroySubMatrices(1,&AA));
1366 */
1367     PetscCall(ISDestroy(&iscol));
1368     PetscCall(ISDestroy(&isrow));
1369     /*
1370        Everyone has to call to draw the matrix since the graphics waits are
1371        synchronized across all processors that share the PetscDraw object
1372     */
1373     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1374     if (rank == 0) {
1375       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1376       PetscCall(MatView_SeqAIJ(Av, sviewer));
1377     }
1378     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1379     PetscCall(PetscViewerFlush(viewer));
1380     PetscCall(MatDestroy(&A));
1381   }
1382   PetscFunctionReturn(PETSC_SUCCESS);
1383 }
1384 
1385 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1386 {
1387   PetscBool iascii, isdraw, issocket, isbinary;
1388 
1389   PetscFunctionBegin;
1390   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1391   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1394   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1395   PetscFunctionReturn(PETSC_SUCCESS);
1396 }
1397 
1398 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1399 {
1400   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1401   Vec         bb1 = NULL;
1402   PetscBool   hasop;
1403 
1404   PetscFunctionBegin;
1405   if (flag == SOR_APPLY_UPPER) {
1406     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1407     PetscFunctionReturn(PETSC_SUCCESS);
1408   }
1409 
1410   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1411 
1412   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1413     if (flag & SOR_ZERO_INITIAL_GUESS) {
1414       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1415       its--;
1416     }
1417 
1418     while (its--) {
1419       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1420       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421 
1422       /* update rhs: bb1 = bb - B*x */
1423       PetscCall(VecScale(mat->lvec, -1.0));
1424       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1425 
1426       /* local sweep */
1427       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1428     }
1429   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1430     if (flag & SOR_ZERO_INITIAL_GUESS) {
1431       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1432       its--;
1433     }
1434     while (its--) {
1435       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1436       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437 
1438       /* update rhs: bb1 = bb - B*x */
1439       PetscCall(VecScale(mat->lvec, -1.0));
1440       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1441 
1442       /* local sweep */
1443       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1444     }
1445   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1446     if (flag & SOR_ZERO_INITIAL_GUESS) {
1447       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1448       its--;
1449     }
1450     while (its--) {
1451       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1452       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453 
1454       /* update rhs: bb1 = bb - B*x */
1455       PetscCall(VecScale(mat->lvec, -1.0));
1456       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1457 
1458       /* local sweep */
1459       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1460     }
1461   } else if (flag & SOR_EISENSTAT) {
1462     Vec xx1;
1463 
1464     PetscCall(VecDuplicate(bb, &xx1));
1465     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1466 
1467     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1468     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1469     if (!mat->diag) {
1470       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1471       PetscCall(MatGetDiagonal(matin, mat->diag));
1472     }
1473     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1474     if (hasop) {
1475       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1476     } else {
1477       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1478     }
1479     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1480 
1481     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1482 
1483     /* local sweep */
1484     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1485     PetscCall(VecAXPY(xx, 1.0, xx1));
1486     PetscCall(VecDestroy(&xx1));
1487   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1488 
1489   PetscCall(VecDestroy(&bb1));
1490 
1491   matin->factorerrortype = mat->A->factorerrortype;
1492   PetscFunctionReturn(PETSC_SUCCESS);
1493 }
1494 
1495 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1496 {
1497   Mat             aA, aB, Aperm;
1498   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1499   PetscScalar    *aa, *ba;
1500   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1501   PetscSF         rowsf, sf;
1502   IS              parcolp = NULL;
1503   PetscBool       done;
1504 
1505   PetscFunctionBegin;
1506   PetscCall(MatGetLocalSize(A, &m, &n));
1507   PetscCall(ISGetIndices(rowp, &rwant));
1508   PetscCall(ISGetIndices(colp, &cwant));
1509   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1510 
1511   /* Invert row permutation to find out where my rows should go */
1512   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1513   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1514   PetscCall(PetscSFSetFromOptions(rowsf));
1515   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1516   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1517   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1518 
1519   /* Invert column permutation to find out where my columns should go */
1520   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1521   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1522   PetscCall(PetscSFSetFromOptions(sf));
1523   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1524   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1525   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1526   PetscCall(PetscSFDestroy(&sf));
1527 
1528   PetscCall(ISRestoreIndices(rowp, &rwant));
1529   PetscCall(ISRestoreIndices(colp, &cwant));
1530   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1531 
1532   /* Find out where my gcols should go */
1533   PetscCall(MatGetSize(aB, NULL, &ng));
1534   PetscCall(PetscMalloc1(ng, &gcdest));
1535   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1536   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1537   PetscCall(PetscSFSetFromOptions(sf));
1538   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1539   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1540   PetscCall(PetscSFDestroy(&sf));
1541 
1542   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1543   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1544   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1545   for (i = 0; i < m; i++) {
1546     PetscInt    row = rdest[i];
1547     PetscMPIInt rowner;
1548     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1549     for (j = ai[i]; j < ai[i + 1]; j++) {
1550       PetscInt    col = cdest[aj[j]];
1551       PetscMPIInt cowner;
1552       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1553       if (rowner == cowner) dnnz[i]++;
1554       else onnz[i]++;
1555     }
1556     for (j = bi[i]; j < bi[i + 1]; j++) {
1557       PetscInt    col = gcdest[bj[j]];
1558       PetscMPIInt cowner;
1559       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1560       if (rowner == cowner) dnnz[i]++;
1561       else onnz[i]++;
1562     }
1563   }
1564   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1565   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1566   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1568   PetscCall(PetscSFDestroy(&rowsf));
1569 
1570   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1571   PetscCall(MatSeqAIJGetArray(aA, &aa));
1572   PetscCall(MatSeqAIJGetArray(aB, &ba));
1573   for (i = 0; i < m; i++) {
1574     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1575     PetscInt  j0, rowlen;
1576     rowlen = ai[i + 1] - ai[i];
1577     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1578       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1579       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1580     }
1581     rowlen = bi[i + 1] - bi[i];
1582     for (j0 = j = 0; j < rowlen; j0 = j) {
1583       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1584       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1585     }
1586   }
1587   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1588   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1589   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1590   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1591   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1592   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1593   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1594   PetscCall(PetscFree3(work, rdest, cdest));
1595   PetscCall(PetscFree(gcdest));
1596   if (parcolp) PetscCall(ISDestroy(&colp));
1597   *B = Aperm;
1598   PetscFunctionReturn(PETSC_SUCCESS);
1599 }
1600 
1601 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1602 {
1603   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1604 
1605   PetscFunctionBegin;
1606   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1607   if (ghosts) *ghosts = aij->garray;
1608   PetscFunctionReturn(PETSC_SUCCESS);
1609 }
1610 
1611 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1612 {
1613   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1614   Mat            A = mat->A, B = mat->B;
1615   PetscLogDouble isend[5], irecv[5];
1616 
1617   PetscFunctionBegin;
1618   info->block_size = 1.0;
1619   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1620 
1621   isend[0] = info->nz_used;
1622   isend[1] = info->nz_allocated;
1623   isend[2] = info->nz_unneeded;
1624   isend[3] = info->memory;
1625   isend[4] = info->mallocs;
1626 
1627   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1628 
1629   isend[0] += info->nz_used;
1630   isend[1] += info->nz_allocated;
1631   isend[2] += info->nz_unneeded;
1632   isend[3] += info->memory;
1633   isend[4] += info->mallocs;
1634   if (flag == MAT_LOCAL) {
1635     info->nz_used      = isend[0];
1636     info->nz_allocated = isend[1];
1637     info->nz_unneeded  = isend[2];
1638     info->memory       = isend[3];
1639     info->mallocs      = isend[4];
1640   } else if (flag == MAT_GLOBAL_MAX) {
1641     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1642 
1643     info->nz_used      = irecv[0];
1644     info->nz_allocated = irecv[1];
1645     info->nz_unneeded  = irecv[2];
1646     info->memory       = irecv[3];
1647     info->mallocs      = irecv[4];
1648   } else if (flag == MAT_GLOBAL_SUM) {
1649     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1650 
1651     info->nz_used      = irecv[0];
1652     info->nz_allocated = irecv[1];
1653     info->nz_unneeded  = irecv[2];
1654     info->memory       = irecv[3];
1655     info->mallocs      = irecv[4];
1656   }
1657   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1658   info->fill_ratio_needed = 0;
1659   info->factor_mallocs    = 0;
1660   PetscFunctionReturn(PETSC_SUCCESS);
1661 }
1662 
1663 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1664 {
1665   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1666 
1667   PetscFunctionBegin;
1668   switch (op) {
1669   case MAT_NEW_NONZERO_LOCATIONS:
1670   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1671   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1672   case MAT_KEEP_NONZERO_PATTERN:
1673   case MAT_NEW_NONZERO_LOCATION_ERR:
1674   case MAT_USE_INODES:
1675   case MAT_IGNORE_ZERO_ENTRIES:
1676   case MAT_FORM_EXPLICIT_TRANSPOSE:
1677     MatCheckPreallocated(A, 1);
1678     PetscCall(MatSetOption(a->A, op, flg));
1679     PetscCall(MatSetOption(a->B, op, flg));
1680     break;
1681   case MAT_ROW_ORIENTED:
1682     MatCheckPreallocated(A, 1);
1683     a->roworiented = flg;
1684 
1685     PetscCall(MatSetOption(a->A, op, flg));
1686     PetscCall(MatSetOption(a->B, op, flg));
1687     break;
1688   case MAT_FORCE_DIAGONAL_ENTRIES:
1689   case MAT_SORTED_FULL:
1690     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1691     break;
1692   case MAT_IGNORE_OFF_PROC_ENTRIES:
1693     a->donotstash = flg;
1694     break;
1695   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1696   case MAT_SPD:
1697   case MAT_SYMMETRIC:
1698   case MAT_STRUCTURALLY_SYMMETRIC:
1699   case MAT_HERMITIAN:
1700   case MAT_SYMMETRY_ETERNAL:
1701   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1702   case MAT_SPD_ETERNAL:
1703     /* if the diagonal matrix is square it inherits some of the properties above */
1704     break;
1705   case MAT_SUBMAT_SINGLEIS:
1706     A->submat_singleis = flg;
1707     break;
1708   case MAT_STRUCTURE_ONLY:
1709     /* The option is handled directly by MatSetOption() */
1710     break;
1711   default:
1712     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1713   }
1714   PetscFunctionReturn(PETSC_SUCCESS);
1715 }
1716 
1717 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1718 {
1719   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1720   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1721   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1722   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1723   PetscInt    *cmap, *idx_p;
1724 
1725   PetscFunctionBegin;
1726   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1727   mat->getrowactive = PETSC_TRUE;
1728 
1729   if (!mat->rowvalues && (idx || v)) {
1730     /*
1731         allocate enough space to hold information from the longest row.
1732     */
1733     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1734     PetscInt    max = 1, tmp;
1735     for (i = 0; i < matin->rmap->n; i++) {
1736       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1737       if (max < tmp) max = tmp;
1738     }
1739     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1740   }
1741 
1742   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1743   lrow = row - rstart;
1744 
1745   pvA = &vworkA;
1746   pcA = &cworkA;
1747   pvB = &vworkB;
1748   pcB = &cworkB;
1749   if (!v) {
1750     pvA = NULL;
1751     pvB = NULL;
1752   }
1753   if (!idx) {
1754     pcA = NULL;
1755     if (!v) pcB = NULL;
1756   }
1757   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1758   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1759   nztot = nzA + nzB;
1760 
1761   cmap = mat->garray;
1762   if (v || idx) {
1763     if (nztot) {
1764       /* Sort by increasing column numbers, assuming A and B already sorted */
1765       PetscInt imark = -1;
1766       if (v) {
1767         *v = v_p = mat->rowvalues;
1768         for (i = 0; i < nzB; i++) {
1769           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1770           else break;
1771         }
1772         imark = i;
1773         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1774         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1775       }
1776       if (idx) {
1777         *idx = idx_p = mat->rowindices;
1778         if (imark > -1) {
1779           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1780         } else {
1781           for (i = 0; i < nzB; i++) {
1782             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1783             else break;
1784           }
1785           imark = i;
1786         }
1787         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1788         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1789       }
1790     } else {
1791       if (idx) *idx = NULL;
1792       if (v) *v = NULL;
1793     }
1794   }
1795   *nz = nztot;
1796   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1797   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1798   PetscFunctionReturn(PETSC_SUCCESS);
1799 }
1800 
1801 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1802 {
1803   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1804 
1805   PetscFunctionBegin;
1806   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1807   aij->getrowactive = PETSC_FALSE;
1808   PetscFunctionReturn(PETSC_SUCCESS);
1809 }
1810 
1811 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1812 {
1813   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1814   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1815   PetscInt         i, j, cstart = mat->cmap->rstart;
1816   PetscReal        sum = 0.0;
1817   const MatScalar *v, *amata, *bmata;
1818 
1819   PetscFunctionBegin;
1820   if (aij->size == 1) {
1821     PetscCall(MatNorm(aij->A, type, norm));
1822   } else {
1823     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1824     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1825     if (type == NORM_FROBENIUS) {
1826       v = amata;
1827       for (i = 0; i < amat->nz; i++) {
1828         sum += PetscRealPart(PetscConj(*v) * (*v));
1829         v++;
1830       }
1831       v = bmata;
1832       for (i = 0; i < bmat->nz; i++) {
1833         sum += PetscRealPart(PetscConj(*v) * (*v));
1834         v++;
1835       }
1836       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1837       *norm = PetscSqrtReal(*norm);
1838       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1839     } else if (type == NORM_1) { /* max column norm */
1840       PetscReal *tmp, *tmp2;
1841       PetscInt  *jj, *garray = aij->garray;
1842       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1843       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1844       *norm = 0.0;
1845       v     = amata;
1846       jj    = amat->j;
1847       for (j = 0; j < amat->nz; j++) {
1848         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1849         v++;
1850       }
1851       v  = bmata;
1852       jj = bmat->j;
1853       for (j = 0; j < bmat->nz; j++) {
1854         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1855         v++;
1856       }
1857       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1858       for (j = 0; j < mat->cmap->N; j++) {
1859         if (tmp2[j] > *norm) *norm = tmp2[j];
1860       }
1861       PetscCall(PetscFree(tmp));
1862       PetscCall(PetscFree(tmp2));
1863       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1864     } else if (type == NORM_INFINITY) { /* max row norm */
1865       PetscReal ntemp = 0.0;
1866       for (j = 0; j < aij->A->rmap->n; j++) {
1867         v   = amata + amat->i[j];
1868         sum = 0.0;
1869         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1870           sum += PetscAbsScalar(*v);
1871           v++;
1872         }
1873         v = bmata + bmat->i[j];
1874         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1875           sum += PetscAbsScalar(*v);
1876           v++;
1877         }
1878         if (sum > ntemp) ntemp = sum;
1879       }
1880       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1881       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1882     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1883     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1884     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1885   }
1886   PetscFunctionReturn(PETSC_SUCCESS);
1887 }
1888 
1889 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1890 {
1891   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1892   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1893   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1894   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1895   Mat              B, A_diag, *B_diag;
1896   const MatScalar *pbv, *bv;
1897 
1898   PetscFunctionBegin;
1899   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1900   ma = A->rmap->n;
1901   na = A->cmap->n;
1902   mb = a->B->rmap->n;
1903   nb = a->B->cmap->n;
1904   ai = Aloc->i;
1905   aj = Aloc->j;
1906   bi = Bloc->i;
1907   bj = Bloc->j;
1908   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1909     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1910     PetscSFNode         *oloc;
1911     PETSC_UNUSED PetscSF sf;
1912 
1913     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1914     /* compute d_nnz for preallocation */
1915     PetscCall(PetscArrayzero(d_nnz, na));
1916     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1917     /* compute local off-diagonal contributions */
1918     PetscCall(PetscArrayzero(g_nnz, nb));
1919     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1920     /* map those to global */
1921     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1922     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1923     PetscCall(PetscSFSetFromOptions(sf));
1924     PetscCall(PetscArrayzero(o_nnz, na));
1925     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1926     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1927     PetscCall(PetscSFDestroy(&sf));
1928 
1929     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1930     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1931     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1932     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1933     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1934     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1935   } else {
1936     B = *matout;
1937     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1938   }
1939 
1940   b           = (Mat_MPIAIJ *)B->data;
1941   A_diag      = a->A;
1942   B_diag      = &b->A;
1943   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1944   A_diag_ncol = A_diag->cmap->N;
1945   B_diag_ilen = sub_B_diag->ilen;
1946   B_diag_i    = sub_B_diag->i;
1947 
1948   /* Set ilen for diagonal of B */
1949   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1950 
1951   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1952   very quickly (=without using MatSetValues), because all writes are local. */
1953   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1954   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1955 
1956   /* copy over the B part */
1957   PetscCall(PetscMalloc1(bi[mb], &cols));
1958   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1959   pbv = bv;
1960   row = A->rmap->rstart;
1961   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1962   cols_tmp = cols;
1963   for (i = 0; i < mb; i++) {
1964     ncol = bi[i + 1] - bi[i];
1965     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1966     row++;
1967     pbv += ncol;
1968     cols_tmp += ncol;
1969   }
1970   PetscCall(PetscFree(cols));
1971   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1972 
1973   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1974   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1975   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1976     *matout = B;
1977   } else {
1978     PetscCall(MatHeaderMerge(A, &B));
1979   }
1980   PetscFunctionReturn(PETSC_SUCCESS);
1981 }
1982 
1983 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1984 {
1985   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1986   Mat         a = aij->A, b = aij->B;
1987   PetscInt    s1, s2, s3;
1988 
1989   PetscFunctionBegin;
1990   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1991   if (rr) {
1992     PetscCall(VecGetLocalSize(rr, &s1));
1993     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1994     /* Overlap communication with computation. */
1995     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1996   }
1997   if (ll) {
1998     PetscCall(VecGetLocalSize(ll, &s1));
1999     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2000     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2001   }
2002   /* scale  the diagonal block */
2003   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2004 
2005   if (rr) {
2006     /* Do a scatter end and then right scale the off-diagonal block */
2007     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2008     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2009   }
2010   PetscFunctionReturn(PETSC_SUCCESS);
2011 }
2012 
2013 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2014 {
2015   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2016 
2017   PetscFunctionBegin;
2018   PetscCall(MatSetUnfactored(a->A));
2019   PetscFunctionReturn(PETSC_SUCCESS);
2020 }
2021 
2022 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2023 {
2024   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2025   Mat         a, b, c, d;
2026   PetscBool   flg;
2027 
2028   PetscFunctionBegin;
2029   a = matA->A;
2030   b = matA->B;
2031   c = matB->A;
2032   d = matB->B;
2033 
2034   PetscCall(MatEqual(a, c, &flg));
2035   if (flg) PetscCall(MatEqual(b, d, &flg));
2036   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2037   PetscFunctionReturn(PETSC_SUCCESS);
2038 }
2039 
2040 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2041 {
2042   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2043   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2044 
2045   PetscFunctionBegin;
2046   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2047   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2048     /* because of the column compression in the off-processor part of the matrix a->B,
2049        the number of columns in a->B and b->B may be different, hence we cannot call
2050        the MatCopy() directly on the two parts. If need be, we can provide a more
2051        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2052        then copying the submatrices */
2053     PetscCall(MatCopy_Basic(A, B, str));
2054   } else {
2055     PetscCall(MatCopy(a->A, b->A, str));
2056     PetscCall(MatCopy(a->B, b->B, str));
2057   }
2058   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2059   PetscFunctionReturn(PETSC_SUCCESS);
2060 }
2061 
2062 /*
2063    Computes the number of nonzeros per row needed for preallocation when X and Y
2064    have different nonzero structure.
2065 */
2066 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2067 {
2068   PetscInt i, j, k, nzx, nzy;
2069 
2070   PetscFunctionBegin;
2071   /* Set the number of nonzeros in the new matrix */
2072   for (i = 0; i < m; i++) {
2073     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2074     nzx    = xi[i + 1] - xi[i];
2075     nzy    = yi[i + 1] - yi[i];
2076     nnz[i] = 0;
2077     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2078       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2079       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2080       nnz[i]++;
2081     }
2082     for (; k < nzy; k++) nnz[i]++;
2083   }
2084   PetscFunctionReturn(PETSC_SUCCESS);
2085 }
2086 
2087 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2088 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2089 {
2090   PetscInt    m = Y->rmap->N;
2091   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2092   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2093 
2094   PetscFunctionBegin;
2095   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2096   PetscFunctionReturn(PETSC_SUCCESS);
2097 }
2098 
2099 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2100 {
2101   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2102 
2103   PetscFunctionBegin;
2104   if (str == SAME_NONZERO_PATTERN) {
2105     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2106     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2107   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2108     PetscCall(MatAXPY_Basic(Y, a, X, str));
2109   } else {
2110     Mat       B;
2111     PetscInt *nnz_d, *nnz_o;
2112 
2113     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2114     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2115     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2116     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2117     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2118     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2119     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2120     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2121     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2122     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2123     PetscCall(MatHeaderMerge(Y, &B));
2124     PetscCall(PetscFree(nnz_d));
2125     PetscCall(PetscFree(nnz_o));
2126   }
2127   PetscFunctionReturn(PETSC_SUCCESS);
2128 }
2129 
2130 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2131 
2132 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2133 {
2134   PetscFunctionBegin;
2135   if (PetscDefined(USE_COMPLEX)) {
2136     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2137 
2138     PetscCall(MatConjugate_SeqAIJ(aij->A));
2139     PetscCall(MatConjugate_SeqAIJ(aij->B));
2140   }
2141   PetscFunctionReturn(PETSC_SUCCESS);
2142 }
2143 
2144 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2145 {
2146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2147 
2148   PetscFunctionBegin;
2149   PetscCall(MatRealPart(a->A));
2150   PetscCall(MatRealPart(a->B));
2151   PetscFunctionReturn(PETSC_SUCCESS);
2152 }
2153 
2154 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2155 {
2156   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2157 
2158   PetscFunctionBegin;
2159   PetscCall(MatImaginaryPart(a->A));
2160   PetscCall(MatImaginaryPart(a->B));
2161   PetscFunctionReturn(PETSC_SUCCESS);
2162 }
2163 
2164 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2165 {
2166   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2167   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2168   PetscScalar       *va, *vv;
2169   Vec                vB, vA;
2170   const PetscScalar *vb;
2171 
2172   PetscFunctionBegin;
2173   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2174   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2175 
2176   PetscCall(VecGetArrayWrite(vA, &va));
2177   if (idx) {
2178     for (i = 0; i < m; i++) {
2179       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2180     }
2181   }
2182 
2183   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2184   PetscCall(PetscMalloc1(m, &idxb));
2185   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2186 
2187   PetscCall(VecGetArrayWrite(v, &vv));
2188   PetscCall(VecGetArrayRead(vB, &vb));
2189   for (i = 0; i < m; i++) {
2190     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2191       vv[i] = vb[i];
2192       if (idx) idx[i] = a->garray[idxb[i]];
2193     } else {
2194       vv[i] = va[i];
2195       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2196     }
2197   }
2198   PetscCall(VecRestoreArrayWrite(vA, &vv));
2199   PetscCall(VecRestoreArrayWrite(vA, &va));
2200   PetscCall(VecRestoreArrayRead(vB, &vb));
2201   PetscCall(PetscFree(idxb));
2202   PetscCall(VecDestroy(&vA));
2203   PetscCall(VecDestroy(&vB));
2204   PetscFunctionReturn(PETSC_SUCCESS);
2205 }
2206 
2207 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2208 {
2209   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2210   PetscInt           m = A->rmap->n, n = A->cmap->n;
2211   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2212   PetscInt          *cmap = mat->garray;
2213   PetscInt          *diagIdx, *offdiagIdx;
2214   Vec                diagV, offdiagV;
2215   PetscScalar       *a, *diagA, *offdiagA;
2216   const PetscScalar *ba, *bav;
2217   PetscInt           r, j, col, ncols, *bi, *bj;
2218   Mat                B = mat->B;
2219   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2220 
2221   PetscFunctionBegin;
2222   /* When a process holds entire A and other processes have no entry */
2223   if (A->cmap->N == n) {
2224     PetscCall(VecGetArrayWrite(v, &diagA));
2225     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2226     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2227     PetscCall(VecDestroy(&diagV));
2228     PetscCall(VecRestoreArrayWrite(v, &diagA));
2229     PetscFunctionReturn(PETSC_SUCCESS);
2230   } else if (n == 0) {
2231     if (m) {
2232       PetscCall(VecGetArrayWrite(v, &a));
2233       for (r = 0; r < m; r++) {
2234         a[r] = 0.0;
2235         if (idx) idx[r] = -1;
2236       }
2237       PetscCall(VecRestoreArrayWrite(v, &a));
2238     }
2239     PetscFunctionReturn(PETSC_SUCCESS);
2240   }
2241 
2242   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2243   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2244   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2245   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2246 
2247   /* Get offdiagIdx[] for implicit 0.0 */
2248   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2249   ba = bav;
2250   bi = b->i;
2251   bj = b->j;
2252   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2253   for (r = 0; r < m; r++) {
2254     ncols = bi[r + 1] - bi[r];
2255     if (ncols == A->cmap->N - n) { /* Brow is dense */
2256       offdiagA[r]   = *ba;
2257       offdiagIdx[r] = cmap[0];
2258     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2259       offdiagA[r] = 0.0;
2260 
2261       /* Find first hole in the cmap */
2262       for (j = 0; j < ncols; j++) {
2263         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2264         if (col > j && j < cstart) {
2265           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2266           break;
2267         } else if (col > j + n && j >= cstart) {
2268           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2269           break;
2270         }
2271       }
2272       if (j == ncols && ncols < A->cmap->N - n) {
2273         /* a hole is outside compressed Bcols */
2274         if (ncols == 0) {
2275           if (cstart) {
2276             offdiagIdx[r] = 0;
2277           } else offdiagIdx[r] = cend;
2278         } else { /* ncols > 0 */
2279           offdiagIdx[r] = cmap[ncols - 1] + 1;
2280           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2281         }
2282       }
2283     }
2284 
2285     for (j = 0; j < ncols; j++) {
2286       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2287         offdiagA[r]   = *ba;
2288         offdiagIdx[r] = cmap[*bj];
2289       }
2290       ba++;
2291       bj++;
2292     }
2293   }
2294 
2295   PetscCall(VecGetArrayWrite(v, &a));
2296   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2297   for (r = 0; r < m; ++r) {
2298     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2299       a[r] = diagA[r];
2300       if (idx) idx[r] = cstart + diagIdx[r];
2301     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2302       a[r] = diagA[r];
2303       if (idx) {
2304         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2305           idx[r] = cstart + diagIdx[r];
2306         } else idx[r] = offdiagIdx[r];
2307       }
2308     } else {
2309       a[r] = offdiagA[r];
2310       if (idx) idx[r] = offdiagIdx[r];
2311     }
2312   }
2313   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2314   PetscCall(VecRestoreArrayWrite(v, &a));
2315   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2316   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2317   PetscCall(VecDestroy(&diagV));
2318   PetscCall(VecDestroy(&offdiagV));
2319   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2320   PetscFunctionReturn(PETSC_SUCCESS);
2321 }
2322 
2323 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2324 {
2325   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2326   PetscInt           m = A->rmap->n, n = A->cmap->n;
2327   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2328   PetscInt          *cmap = mat->garray;
2329   PetscInt          *diagIdx, *offdiagIdx;
2330   Vec                diagV, offdiagV;
2331   PetscScalar       *a, *diagA, *offdiagA;
2332   const PetscScalar *ba, *bav;
2333   PetscInt           r, j, col, ncols, *bi, *bj;
2334   Mat                B = mat->B;
2335   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2336 
2337   PetscFunctionBegin;
2338   /* When a process holds entire A and other processes have no entry */
2339   if (A->cmap->N == n) {
2340     PetscCall(VecGetArrayWrite(v, &diagA));
2341     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2342     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2343     PetscCall(VecDestroy(&diagV));
2344     PetscCall(VecRestoreArrayWrite(v, &diagA));
2345     PetscFunctionReturn(PETSC_SUCCESS);
2346   } else if (n == 0) {
2347     if (m) {
2348       PetscCall(VecGetArrayWrite(v, &a));
2349       for (r = 0; r < m; r++) {
2350         a[r] = PETSC_MAX_REAL;
2351         if (idx) idx[r] = -1;
2352       }
2353       PetscCall(VecRestoreArrayWrite(v, &a));
2354     }
2355     PetscFunctionReturn(PETSC_SUCCESS);
2356   }
2357 
2358   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2359   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2360   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2361   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2362 
2363   /* Get offdiagIdx[] for implicit 0.0 */
2364   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2365   ba = bav;
2366   bi = b->i;
2367   bj = b->j;
2368   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2369   for (r = 0; r < m; r++) {
2370     ncols = bi[r + 1] - bi[r];
2371     if (ncols == A->cmap->N - n) { /* Brow is dense */
2372       offdiagA[r]   = *ba;
2373       offdiagIdx[r] = cmap[0];
2374     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2375       offdiagA[r] = 0.0;
2376 
2377       /* Find first hole in the cmap */
2378       for (j = 0; j < ncols; j++) {
2379         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2380         if (col > j && j < cstart) {
2381           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2382           break;
2383         } else if (col > j + n && j >= cstart) {
2384           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2385           break;
2386         }
2387       }
2388       if (j == ncols && ncols < A->cmap->N - n) {
2389         /* a hole is outside compressed Bcols */
2390         if (ncols == 0) {
2391           if (cstart) {
2392             offdiagIdx[r] = 0;
2393           } else offdiagIdx[r] = cend;
2394         } else { /* ncols > 0 */
2395           offdiagIdx[r] = cmap[ncols - 1] + 1;
2396           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2397         }
2398       }
2399     }
2400 
2401     for (j = 0; j < ncols; j++) {
2402       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2403         offdiagA[r]   = *ba;
2404         offdiagIdx[r] = cmap[*bj];
2405       }
2406       ba++;
2407       bj++;
2408     }
2409   }
2410 
2411   PetscCall(VecGetArrayWrite(v, &a));
2412   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2413   for (r = 0; r < m; ++r) {
2414     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2415       a[r] = diagA[r];
2416       if (idx) idx[r] = cstart + diagIdx[r];
2417     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2418       a[r] = diagA[r];
2419       if (idx) {
2420         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2421           idx[r] = cstart + diagIdx[r];
2422         } else idx[r] = offdiagIdx[r];
2423       }
2424     } else {
2425       a[r] = offdiagA[r];
2426       if (idx) idx[r] = offdiagIdx[r];
2427     }
2428   }
2429   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2430   PetscCall(VecRestoreArrayWrite(v, &a));
2431   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2432   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2433   PetscCall(VecDestroy(&diagV));
2434   PetscCall(VecDestroy(&offdiagV));
2435   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2436   PetscFunctionReturn(PETSC_SUCCESS);
2437 }
2438 
2439 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2440 {
2441   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2442   PetscInt           m = A->rmap->n, n = A->cmap->n;
2443   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2444   PetscInt          *cmap = mat->garray;
2445   PetscInt          *diagIdx, *offdiagIdx;
2446   Vec                diagV, offdiagV;
2447   PetscScalar       *a, *diagA, *offdiagA;
2448   const PetscScalar *ba, *bav;
2449   PetscInt           r, j, col, ncols, *bi, *bj;
2450   Mat                B = mat->B;
2451   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2452 
2453   PetscFunctionBegin;
2454   /* When a process holds entire A and other processes have no entry */
2455   if (A->cmap->N == n) {
2456     PetscCall(VecGetArrayWrite(v, &diagA));
2457     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2458     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2459     PetscCall(VecDestroy(&diagV));
2460     PetscCall(VecRestoreArrayWrite(v, &diagA));
2461     PetscFunctionReturn(PETSC_SUCCESS);
2462   } else if (n == 0) {
2463     if (m) {
2464       PetscCall(VecGetArrayWrite(v, &a));
2465       for (r = 0; r < m; r++) {
2466         a[r] = PETSC_MIN_REAL;
2467         if (idx) idx[r] = -1;
2468       }
2469       PetscCall(VecRestoreArrayWrite(v, &a));
2470     }
2471     PetscFunctionReturn(PETSC_SUCCESS);
2472   }
2473 
2474   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2475   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2476   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2477   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2478 
2479   /* Get offdiagIdx[] for implicit 0.0 */
2480   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2481   ba = bav;
2482   bi = b->i;
2483   bj = b->j;
2484   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2485   for (r = 0; r < m; r++) {
2486     ncols = bi[r + 1] - bi[r];
2487     if (ncols == A->cmap->N - n) { /* Brow is dense */
2488       offdiagA[r]   = *ba;
2489       offdiagIdx[r] = cmap[0];
2490     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2491       offdiagA[r] = 0.0;
2492 
2493       /* Find first hole in the cmap */
2494       for (j = 0; j < ncols; j++) {
2495         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2496         if (col > j && j < cstart) {
2497           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2498           break;
2499         } else if (col > j + n && j >= cstart) {
2500           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2501           break;
2502         }
2503       }
2504       if (j == ncols && ncols < A->cmap->N - n) {
2505         /* a hole is outside compressed Bcols */
2506         if (ncols == 0) {
2507           if (cstart) {
2508             offdiagIdx[r] = 0;
2509           } else offdiagIdx[r] = cend;
2510         } else { /* ncols > 0 */
2511           offdiagIdx[r] = cmap[ncols - 1] + 1;
2512           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2513         }
2514       }
2515     }
2516 
2517     for (j = 0; j < ncols; j++) {
2518       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2519         offdiagA[r]   = *ba;
2520         offdiagIdx[r] = cmap[*bj];
2521       }
2522       ba++;
2523       bj++;
2524     }
2525   }
2526 
2527   PetscCall(VecGetArrayWrite(v, &a));
2528   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2529   for (r = 0; r < m; ++r) {
2530     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2531       a[r] = diagA[r];
2532       if (idx) idx[r] = cstart + diagIdx[r];
2533     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2534       a[r] = diagA[r];
2535       if (idx) {
2536         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2537           idx[r] = cstart + diagIdx[r];
2538         } else idx[r] = offdiagIdx[r];
2539       }
2540     } else {
2541       a[r] = offdiagA[r];
2542       if (idx) idx[r] = offdiagIdx[r];
2543     }
2544   }
2545   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2546   PetscCall(VecRestoreArrayWrite(v, &a));
2547   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2548   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2549   PetscCall(VecDestroy(&diagV));
2550   PetscCall(VecDestroy(&offdiagV));
2551   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2552   PetscFunctionReturn(PETSC_SUCCESS);
2553 }
2554 
2555 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2556 {
2557   Mat *dummy;
2558 
2559   PetscFunctionBegin;
2560   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2561   *newmat = *dummy;
2562   PetscCall(PetscFree(dummy));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2567 {
2568   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatInvertBlockDiagonal(a->A, values));
2572   A->factorerrortype = a->A->factorerrortype;
2573   PetscFunctionReturn(PETSC_SUCCESS);
2574 }
2575 
2576 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2577 {
2578   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2579 
2580   PetscFunctionBegin;
2581   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2582   PetscCall(MatSetRandom(aij->A, rctx));
2583   if (x->assembled) {
2584     PetscCall(MatSetRandom(aij->B, rctx));
2585   } else {
2586     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2587   }
2588   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2589   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2594 {
2595   PetscFunctionBegin;
2596   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2597   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2598   PetscFunctionReturn(PETSC_SUCCESS);
2599 }
2600 
2601 /*@
2602   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2603 
2604   Not Collective
2605 
2606   Input Parameter:
2607 . A - the matrix
2608 
2609   Output Parameter:
2610 . nz - the number of nonzeros
2611 
2612   Level: advanced
2613 
2614 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2615 @*/
2616 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2617 {
2618   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2619   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2620   PetscBool   isaij;
2621 
2622   PetscFunctionBegin;
2623   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2624   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2625   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2626   PetscFunctionReturn(PETSC_SUCCESS);
2627 }
2628 
2629 /*@
2630   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2631 
2632   Collective
2633 
2634   Input Parameters:
2635 + A  - the matrix
2636 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2637 
2638   Level: advanced
2639 
2640 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2641 @*/
2642 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2643 {
2644   PetscFunctionBegin;
2645   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2646   PetscFunctionReturn(PETSC_SUCCESS);
2647 }
2648 
2649 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2650 {
2651   PetscBool sc = PETSC_FALSE, flg;
2652 
2653   PetscFunctionBegin;
2654   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2655   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2656   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2657   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2658   PetscOptionsHeadEnd();
2659   PetscFunctionReturn(PETSC_SUCCESS);
2660 }
2661 
2662 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2663 {
2664   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2665   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2666 
2667   PetscFunctionBegin;
2668   if (!Y->preallocated) {
2669     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2670   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2671     PetscInt nonew = aij->nonew;
2672     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2673     aij->nonew = nonew;
2674   }
2675   PetscCall(MatShift_Basic(Y, a));
2676   PetscFunctionReturn(PETSC_SUCCESS);
2677 }
2678 
2679 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2680 {
2681   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2682 
2683   PetscFunctionBegin;
2684   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2685   PetscCall(MatMissingDiagonal(a->A, missing, d));
2686   if (d) {
2687     PetscInt rstart;
2688     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2689     *d += rstart;
2690   }
2691   PetscFunctionReturn(PETSC_SUCCESS);
2692 }
2693 
2694 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2695 {
2696   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2697 
2698   PetscFunctionBegin;
2699   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2700   PetscFunctionReturn(PETSC_SUCCESS);
2701 }
2702 
2703 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A)
2704 {
2705   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2706 
2707   PetscFunctionBegin;
2708   PetscCall(MatEliminateZeros(a->A));
2709   PetscCall(MatEliminateZeros(a->B));
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2714                                        MatGetRow_MPIAIJ,
2715                                        MatRestoreRow_MPIAIJ,
2716                                        MatMult_MPIAIJ,
2717                                        /* 4*/ MatMultAdd_MPIAIJ,
2718                                        MatMultTranspose_MPIAIJ,
2719                                        MatMultTransposeAdd_MPIAIJ,
2720                                        NULL,
2721                                        NULL,
2722                                        NULL,
2723                                        /*10*/ NULL,
2724                                        NULL,
2725                                        NULL,
2726                                        MatSOR_MPIAIJ,
2727                                        MatTranspose_MPIAIJ,
2728                                        /*15*/ MatGetInfo_MPIAIJ,
2729                                        MatEqual_MPIAIJ,
2730                                        MatGetDiagonal_MPIAIJ,
2731                                        MatDiagonalScale_MPIAIJ,
2732                                        MatNorm_MPIAIJ,
2733                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2734                                        MatAssemblyEnd_MPIAIJ,
2735                                        MatSetOption_MPIAIJ,
2736                                        MatZeroEntries_MPIAIJ,
2737                                        /*24*/ MatZeroRows_MPIAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                        NULL,
2742                                        /*29*/ MatSetUp_MPI_Hash,
2743                                        NULL,
2744                                        NULL,
2745                                        MatGetDiagonalBlock_MPIAIJ,
2746                                        NULL,
2747                                        /*34*/ MatDuplicate_MPIAIJ,
2748                                        NULL,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        /*39*/ MatAXPY_MPIAIJ,
2753                                        MatCreateSubMatrices_MPIAIJ,
2754                                        MatIncreaseOverlap_MPIAIJ,
2755                                        MatGetValues_MPIAIJ,
2756                                        MatCopy_MPIAIJ,
2757                                        /*44*/ MatGetRowMax_MPIAIJ,
2758                                        MatScale_MPIAIJ,
2759                                        MatShift_MPIAIJ,
2760                                        MatDiagonalSet_MPIAIJ,
2761                                        MatZeroRowsColumns_MPIAIJ,
2762                                        /*49*/ MatSetRandom_MPIAIJ,
2763                                        MatGetRowIJ_MPIAIJ,
2764                                        MatRestoreRowIJ_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2768                                        NULL,
2769                                        MatSetUnfactored_MPIAIJ,
2770                                        MatPermute_MPIAIJ,
2771                                        NULL,
2772                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2773                                        MatDestroy_MPIAIJ,
2774                                        MatView_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                        /*64*/ NULL,
2778                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2783                                        MatGetRowMinAbs_MPIAIJ,
2784                                        NULL,
2785                                        NULL,
2786                                        NULL,
2787                                        NULL,
2788                                        /*75*/ MatFDColoringApply_AIJ,
2789                                        MatSetFromOptions_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        MatFindZeroDiagonals_MPIAIJ,
2793                                        /*80*/ NULL,
2794                                        NULL,
2795                                        NULL,
2796                                        /*83*/ MatLoad_MPIAIJ,
2797                                        MatIsSymmetric_MPIAIJ,
2798                                        NULL,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        /*89*/ NULL,
2803                                        NULL,
2804                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2805                                        NULL,
2806                                        NULL,
2807                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        MatBindToCPU_MPIAIJ,
2812                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2813                                        NULL,
2814                                        NULL,
2815                                        MatConjugate_MPIAIJ,
2816                                        NULL,
2817                                        /*104*/ MatSetValuesRow_MPIAIJ,
2818                                        MatRealPart_MPIAIJ,
2819                                        MatImaginaryPart_MPIAIJ,
2820                                        NULL,
2821                                        NULL,
2822                                        /*109*/ NULL,
2823                                        NULL,
2824                                        MatGetRowMin_MPIAIJ,
2825                                        NULL,
2826                                        MatMissingDiagonal_MPIAIJ,
2827                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2828                                        NULL,
2829                                        MatGetGhosts_MPIAIJ,
2830                                        NULL,
2831                                        NULL,
2832                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        NULL,
2836                                        MatGetMultiProcBlock_MPIAIJ,
2837                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2838                                        MatGetColumnReductions_MPIAIJ,
2839                                        MatInvertBlockDiagonal_MPIAIJ,
2840                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2841                                        MatCreateSubMatricesMPI_MPIAIJ,
2842                                        /*129*/ NULL,
2843                                        NULL,
2844                                        NULL,
2845                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2846                                        NULL,
2847                                        /*134*/ NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        NULL,
2851                                        NULL,
2852                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2853                                        NULL,
2854                                        NULL,
2855                                        MatFDColoringSetUp_MPIXAIJ,
2856                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2857                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2858                                        /*145*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        MatCreateGraph_Simple_AIJ,
2862                                        NULL,
2863                                        /*150*/ NULL,
2864                                        MatEliminateZeros_MPIAIJ};
2865 
2866 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2867 {
2868   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2869 
2870   PetscFunctionBegin;
2871   PetscCall(MatStoreValues(aij->A));
2872   PetscCall(MatStoreValues(aij->B));
2873   PetscFunctionReturn(PETSC_SUCCESS);
2874 }
2875 
2876 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2877 {
2878   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2879 
2880   PetscFunctionBegin;
2881   PetscCall(MatRetrieveValues(aij->A));
2882   PetscCall(MatRetrieveValues(aij->B));
2883   PetscFunctionReturn(PETSC_SUCCESS);
2884 }
2885 
2886 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2887 {
2888   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2889   PetscMPIInt size;
2890 
2891   PetscFunctionBegin;
2892   if (B->hash_active) {
2893     B->ops[0]      = b->cops;
2894     B->hash_active = PETSC_FALSE;
2895   }
2896   PetscCall(PetscLayoutSetUp(B->rmap));
2897   PetscCall(PetscLayoutSetUp(B->cmap));
2898 
2899 #if defined(PETSC_USE_CTABLE)
2900   PetscCall(PetscHMapIDestroy(&b->colmap));
2901 #else
2902   PetscCall(PetscFree(b->colmap));
2903 #endif
2904   PetscCall(PetscFree(b->garray));
2905   PetscCall(VecDestroy(&b->lvec));
2906   PetscCall(VecScatterDestroy(&b->Mvctx));
2907 
2908   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2909   PetscCall(MatDestroy(&b->B));
2910   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2911   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2912   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2913   PetscCall(MatSetType(b->B, MATSEQAIJ));
2914 
2915   PetscCall(MatDestroy(&b->A));
2916   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2917   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2918   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2919   PetscCall(MatSetType(b->A, MATSEQAIJ));
2920 
2921   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2922   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2923   B->preallocated  = PETSC_TRUE;
2924   B->was_assembled = PETSC_FALSE;
2925   B->assembled     = PETSC_FALSE;
2926   PetscFunctionReturn(PETSC_SUCCESS);
2927 }
2928 
2929 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2930 {
2931   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2932 
2933   PetscFunctionBegin;
2934   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2935   PetscCall(PetscLayoutSetUp(B->rmap));
2936   PetscCall(PetscLayoutSetUp(B->cmap));
2937 
2938 #if defined(PETSC_USE_CTABLE)
2939   PetscCall(PetscHMapIDestroy(&b->colmap));
2940 #else
2941   PetscCall(PetscFree(b->colmap));
2942 #endif
2943   PetscCall(PetscFree(b->garray));
2944   PetscCall(VecDestroy(&b->lvec));
2945   PetscCall(VecScatterDestroy(&b->Mvctx));
2946 
2947   PetscCall(MatResetPreallocation(b->A));
2948   PetscCall(MatResetPreallocation(b->B));
2949   B->preallocated  = PETSC_TRUE;
2950   B->was_assembled = PETSC_FALSE;
2951   B->assembled     = PETSC_FALSE;
2952   PetscFunctionReturn(PETSC_SUCCESS);
2953 }
2954 
2955 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2956 {
2957   Mat         mat;
2958   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2959 
2960   PetscFunctionBegin;
2961   *newmat = NULL;
2962   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2963   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2964   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2965   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2966   a = (Mat_MPIAIJ *)mat->data;
2967 
2968   mat->factortype   = matin->factortype;
2969   mat->assembled    = matin->assembled;
2970   mat->insertmode   = NOT_SET_VALUES;
2971   mat->preallocated = matin->preallocated;
2972 
2973   a->size         = oldmat->size;
2974   a->rank         = oldmat->rank;
2975   a->donotstash   = oldmat->donotstash;
2976   a->roworiented  = oldmat->roworiented;
2977   a->rowindices   = NULL;
2978   a->rowvalues    = NULL;
2979   a->getrowactive = PETSC_FALSE;
2980 
2981   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2982   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2983 
2984   if (oldmat->colmap) {
2985 #if defined(PETSC_USE_CTABLE)
2986     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2987 #else
2988     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2989     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2990 #endif
2991   } else a->colmap = NULL;
2992   if (oldmat->garray) {
2993     PetscInt len;
2994     len = oldmat->B->cmap->n;
2995     PetscCall(PetscMalloc1(len + 1, &a->garray));
2996     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2997   } else a->garray = NULL;
2998 
2999   /* It may happen MatDuplicate is called with a non-assembled matrix
3000      In fact, MatDuplicate only requires the matrix to be preallocated
3001      This may happen inside a DMCreateMatrix_Shell */
3002   if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3003   if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3004   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3005   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3006   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3007   *newmat = mat;
3008   PetscFunctionReturn(PETSC_SUCCESS);
3009 }
3010 
3011 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3012 {
3013   PetscBool isbinary, ishdf5;
3014 
3015   PetscFunctionBegin;
3016   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3017   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3018   /* force binary viewer to load .info file if it has not yet done so */
3019   PetscCall(PetscViewerSetUp(viewer));
3020   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3021   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3022   if (isbinary) {
3023     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3024   } else if (ishdf5) {
3025 #if defined(PETSC_HAVE_HDF5)
3026     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3027 #else
3028     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3029 #endif
3030   } else {
3031     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3032   }
3033   PetscFunctionReturn(PETSC_SUCCESS);
3034 }
3035 
3036 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3037 {
3038   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3039   PetscInt    *rowidxs, *colidxs;
3040   PetscScalar *matvals;
3041 
3042   PetscFunctionBegin;
3043   PetscCall(PetscViewerSetUp(viewer));
3044 
3045   /* read in matrix header */
3046   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3047   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3048   M  = header[1];
3049   N  = header[2];
3050   nz = header[3];
3051   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3052   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3053   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3054 
3055   /* set block sizes from the viewer's .info file */
3056   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3057   /* set global sizes if not set already */
3058   if (mat->rmap->N < 0) mat->rmap->N = M;
3059   if (mat->cmap->N < 0) mat->cmap->N = N;
3060   PetscCall(PetscLayoutSetUp(mat->rmap));
3061   PetscCall(PetscLayoutSetUp(mat->cmap));
3062 
3063   /* check if the matrix sizes are correct */
3064   PetscCall(MatGetSize(mat, &rows, &cols));
3065   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3066 
3067   /* read in row lengths and build row indices */
3068   PetscCall(MatGetLocalSize(mat, &m, NULL));
3069   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3070   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3071   rowidxs[0] = 0;
3072   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3073   if (nz != PETSC_MAX_INT) {
3074     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3075     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3076   }
3077 
3078   /* read in column indices and matrix values */
3079   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3080   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3081   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3082   /* store matrix indices and values */
3083   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3084   PetscCall(PetscFree(rowidxs));
3085   PetscCall(PetscFree2(colidxs, matvals));
3086   PetscFunctionReturn(PETSC_SUCCESS);
3087 }
3088 
3089 /* Not scalable because of ISAllGather() unless getting all columns. */
3090 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3091 {
3092   IS          iscol_local;
3093   PetscBool   isstride;
3094   PetscMPIInt lisstride = 0, gisstride;
3095 
3096   PetscFunctionBegin;
3097   /* check if we are grabbing all columns*/
3098   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3099 
3100   if (isstride) {
3101     PetscInt start, len, mstart, mlen;
3102     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3103     PetscCall(ISGetLocalSize(iscol, &len));
3104     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3105     if (mstart == start && mlen - mstart == len) lisstride = 1;
3106   }
3107 
3108   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3109   if (gisstride) {
3110     PetscInt N;
3111     PetscCall(MatGetSize(mat, NULL, &N));
3112     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3113     PetscCall(ISSetIdentity(iscol_local));
3114     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3115   } else {
3116     PetscInt cbs;
3117     PetscCall(ISGetBlockSize(iscol, &cbs));
3118     PetscCall(ISAllGather(iscol, &iscol_local));
3119     PetscCall(ISSetBlockSize(iscol_local, cbs));
3120   }
3121 
3122   *isseq = iscol_local;
3123   PetscFunctionReturn(PETSC_SUCCESS);
3124 }
3125 
3126 /*
3127  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3128  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3129 
3130  Input Parameters:
3131 +   mat - matrix
3132 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3133            i.e., mat->rstart <= isrow[i] < mat->rend
3134 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3135            i.e., mat->cstart <= iscol[i] < mat->cend
3136 
3137  Output Parameters:
3138 +   isrow_d - sequential row index set for retrieving mat->A
3139 .   iscol_d - sequential  column index set for retrieving mat->A
3140 .   iscol_o - sequential column index set for retrieving mat->B
3141 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3142  */
3143 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3144 {
3145   Vec             x, cmap;
3146   const PetscInt *is_idx;
3147   PetscScalar    *xarray, *cmaparray;
3148   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3149   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3150   Mat             B    = a->B;
3151   Vec             lvec = a->lvec, lcmap;
3152   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3153   MPI_Comm        comm;
3154   VecScatter      Mvctx = a->Mvctx;
3155 
3156   PetscFunctionBegin;
3157   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3158   PetscCall(ISGetLocalSize(iscol, &ncols));
3159 
3160   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3161   PetscCall(MatCreateVecs(mat, &x, NULL));
3162   PetscCall(VecSet(x, -1.0));
3163   PetscCall(VecDuplicate(x, &cmap));
3164   PetscCall(VecSet(cmap, -1.0));
3165 
3166   /* Get start indices */
3167   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3168   isstart -= ncols;
3169   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3170 
3171   PetscCall(ISGetIndices(iscol, &is_idx));
3172   PetscCall(VecGetArray(x, &xarray));
3173   PetscCall(VecGetArray(cmap, &cmaparray));
3174   PetscCall(PetscMalloc1(ncols, &idx));
3175   for (i = 0; i < ncols; i++) {
3176     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3177     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3178     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3179   }
3180   PetscCall(VecRestoreArray(x, &xarray));
3181   PetscCall(VecRestoreArray(cmap, &cmaparray));
3182   PetscCall(ISRestoreIndices(iscol, &is_idx));
3183 
3184   /* Get iscol_d */
3185   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3186   PetscCall(ISGetBlockSize(iscol, &i));
3187   PetscCall(ISSetBlockSize(*iscol_d, i));
3188 
3189   /* Get isrow_d */
3190   PetscCall(ISGetLocalSize(isrow, &m));
3191   rstart = mat->rmap->rstart;
3192   PetscCall(PetscMalloc1(m, &idx));
3193   PetscCall(ISGetIndices(isrow, &is_idx));
3194   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3195   PetscCall(ISRestoreIndices(isrow, &is_idx));
3196 
3197   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3198   PetscCall(ISGetBlockSize(isrow, &i));
3199   PetscCall(ISSetBlockSize(*isrow_d, i));
3200 
3201   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3202   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3203   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3204 
3205   PetscCall(VecDuplicate(lvec, &lcmap));
3206 
3207   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3208   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3209 
3210   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3211   /* off-process column indices */
3212   count = 0;
3213   PetscCall(PetscMalloc1(Bn, &idx));
3214   PetscCall(PetscMalloc1(Bn, &cmap1));
3215 
3216   PetscCall(VecGetArray(lvec, &xarray));
3217   PetscCall(VecGetArray(lcmap, &cmaparray));
3218   for (i = 0; i < Bn; i++) {
3219     if (PetscRealPart(xarray[i]) > -1.0) {
3220       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3221       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3222       count++;
3223     }
3224   }
3225   PetscCall(VecRestoreArray(lvec, &xarray));
3226   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3227 
3228   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3229   /* cannot ensure iscol_o has same blocksize as iscol! */
3230 
3231   PetscCall(PetscFree(idx));
3232   *garray = cmap1;
3233 
3234   PetscCall(VecDestroy(&x));
3235   PetscCall(VecDestroy(&cmap));
3236   PetscCall(VecDestroy(&lcmap));
3237   PetscFunctionReturn(PETSC_SUCCESS);
3238 }
3239 
3240 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3241 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3242 {
3243   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3244   Mat         M = NULL;
3245   MPI_Comm    comm;
3246   IS          iscol_d, isrow_d, iscol_o;
3247   Mat         Asub = NULL, Bsub = NULL;
3248   PetscInt    n;
3249 
3250   PetscFunctionBegin;
3251   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3252 
3253   if (call == MAT_REUSE_MATRIX) {
3254     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3255     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3256     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3257 
3258     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3259     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3260 
3261     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3262     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3263 
3264     /* Update diagonal and off-diagonal portions of submat */
3265     asub = (Mat_MPIAIJ *)(*submat)->data;
3266     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3267     PetscCall(ISGetLocalSize(iscol_o, &n));
3268     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3269     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3270     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3271 
3272   } else { /* call == MAT_INITIAL_MATRIX) */
3273     const PetscInt *garray;
3274     PetscInt        BsubN;
3275 
3276     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3277     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3278 
3279     /* Create local submatrices Asub and Bsub */
3280     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3281     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3282 
3283     /* Create submatrix M */
3284     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3285 
3286     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3287     asub = (Mat_MPIAIJ *)M->data;
3288 
3289     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3290     n = asub->B->cmap->N;
3291     if (BsubN > n) {
3292       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3293       const PetscInt *idx;
3294       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3295       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3296 
3297       PetscCall(PetscMalloc1(n, &idx_new));
3298       j = 0;
3299       PetscCall(ISGetIndices(iscol_o, &idx));
3300       for (i = 0; i < n; i++) {
3301         if (j >= BsubN) break;
3302         while (subgarray[i] > garray[j]) j++;
3303 
3304         if (subgarray[i] == garray[j]) {
3305           idx_new[i] = idx[j++];
3306         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3307       }
3308       PetscCall(ISRestoreIndices(iscol_o, &idx));
3309 
3310       PetscCall(ISDestroy(&iscol_o));
3311       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3312 
3313     } else if (BsubN < n) {
3314       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3315     }
3316 
3317     PetscCall(PetscFree(garray));
3318     *submat = M;
3319 
3320     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3321     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3322     PetscCall(ISDestroy(&isrow_d));
3323 
3324     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3325     PetscCall(ISDestroy(&iscol_d));
3326 
3327     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3328     PetscCall(ISDestroy(&iscol_o));
3329   }
3330   PetscFunctionReturn(PETSC_SUCCESS);
3331 }
3332 
3333 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3334 {
3335   IS        iscol_local = NULL, isrow_d;
3336   PetscInt  csize;
3337   PetscInt  n, i, j, start, end;
3338   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3339   MPI_Comm  comm;
3340 
3341   PetscFunctionBegin;
3342   /* If isrow has same processor distribution as mat,
3343      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3344   if (call == MAT_REUSE_MATRIX) {
3345     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3346     if (isrow_d) {
3347       sameRowDist  = PETSC_TRUE;
3348       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3349     } else {
3350       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3351       if (iscol_local) {
3352         sameRowDist  = PETSC_TRUE;
3353         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3354       }
3355     }
3356   } else {
3357     /* Check if isrow has same processor distribution as mat */
3358     sameDist[0] = PETSC_FALSE;
3359     PetscCall(ISGetLocalSize(isrow, &n));
3360     if (!n) {
3361       sameDist[0] = PETSC_TRUE;
3362     } else {
3363       PetscCall(ISGetMinMax(isrow, &i, &j));
3364       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3365       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3366     }
3367 
3368     /* Check if iscol has same processor distribution as mat */
3369     sameDist[1] = PETSC_FALSE;
3370     PetscCall(ISGetLocalSize(iscol, &n));
3371     if (!n) {
3372       sameDist[1] = PETSC_TRUE;
3373     } else {
3374       PetscCall(ISGetMinMax(iscol, &i, &j));
3375       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3376       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3377     }
3378 
3379     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3380     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3381     sameRowDist = tsameDist[0];
3382   }
3383 
3384   if (sameRowDist) {
3385     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3386       /* isrow and iscol have same processor distribution as mat */
3387       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3388       PetscFunctionReturn(PETSC_SUCCESS);
3389     } else { /* sameRowDist */
3390       /* isrow has same processor distribution as mat */
3391       if (call == MAT_INITIAL_MATRIX) {
3392         PetscBool sorted;
3393         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3394         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3395         PetscCall(ISGetSize(iscol, &i));
3396         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3397 
3398         PetscCall(ISSorted(iscol_local, &sorted));
3399         if (sorted) {
3400           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3401           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3402           PetscFunctionReturn(PETSC_SUCCESS);
3403         }
3404       } else { /* call == MAT_REUSE_MATRIX */
3405         IS iscol_sub;
3406         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3407         if (iscol_sub) {
3408           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3409           PetscFunctionReturn(PETSC_SUCCESS);
3410         }
3411       }
3412     }
3413   }
3414 
3415   /* General case: iscol -> iscol_local which has global size of iscol */
3416   if (call == MAT_REUSE_MATRIX) {
3417     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3418     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3419   } else {
3420     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3421   }
3422 
3423   PetscCall(ISGetLocalSize(iscol, &csize));
3424   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3425 
3426   if (call == MAT_INITIAL_MATRIX) {
3427     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3428     PetscCall(ISDestroy(&iscol_local));
3429   }
3430   PetscFunctionReturn(PETSC_SUCCESS);
3431 }
3432 
3433 /*@C
3434   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3435   and "off-diagonal" part of the matrix in CSR format.
3436 
3437   Collective
3438 
3439   Input Parameters:
3440 + comm   - MPI communicator
3441 . A      - "diagonal" portion of matrix
3442 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3443 - garray - global index of `B` columns
3444 
3445   Output Parameter:
3446 . mat - the matrix, with input `A` as its local diagonal matrix
3447 
3448   Level: advanced
3449 
3450   Notes:
3451   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3452 
3453   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3454 
3455 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3456 @*/
3457 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3458 {
3459   Mat_MPIAIJ        *maij;
3460   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3461   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3462   const PetscScalar *oa;
3463   Mat                Bnew;
3464   PetscInt           m, n, N;
3465   MatType            mpi_mat_type;
3466 
3467   PetscFunctionBegin;
3468   PetscCall(MatCreate(comm, mat));
3469   PetscCall(MatGetSize(A, &m, &n));
3470   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3471   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3472   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3473   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3474 
3475   /* Get global columns of mat */
3476   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3477 
3478   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3479   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3480   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3481   PetscCall(MatSetType(*mat, mpi_mat_type));
3482 
3483   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3484   maij = (Mat_MPIAIJ *)(*mat)->data;
3485 
3486   (*mat)->preallocated = PETSC_TRUE;
3487 
3488   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3489   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3490 
3491   /* Set A as diagonal portion of *mat */
3492   maij->A = A;
3493 
3494   nz = oi[m];
3495   for (i = 0; i < nz; i++) {
3496     col   = oj[i];
3497     oj[i] = garray[col];
3498   }
3499 
3500   /* Set Bnew as off-diagonal portion of *mat */
3501   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3502   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3503   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3504   bnew        = (Mat_SeqAIJ *)Bnew->data;
3505   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3506   maij->B     = Bnew;
3507 
3508   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3509 
3510   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3511   b->free_a       = PETSC_FALSE;
3512   b->free_ij      = PETSC_FALSE;
3513   PetscCall(MatDestroy(&B));
3514 
3515   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3516   bnew->free_a       = PETSC_TRUE;
3517   bnew->free_ij      = PETSC_TRUE;
3518 
3519   /* condense columns of maij->B */
3520   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3521   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3522   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3523   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3524   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3525   PetscFunctionReturn(PETSC_SUCCESS);
3526 }
3527 
3528 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3529 
3530 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3531 {
3532   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3533   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3534   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3535   Mat             M, Msub, B = a->B;
3536   MatScalar      *aa;
3537   Mat_SeqAIJ     *aij;
3538   PetscInt       *garray = a->garray, *colsub, Ncols;
3539   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3540   IS              iscol_sub, iscmap;
3541   const PetscInt *is_idx, *cmap;
3542   PetscBool       allcolumns = PETSC_FALSE;
3543   MPI_Comm        comm;
3544 
3545   PetscFunctionBegin;
3546   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3547   if (call == MAT_REUSE_MATRIX) {
3548     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3549     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3550     PetscCall(ISGetLocalSize(iscol_sub, &count));
3551 
3552     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3553     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3554 
3555     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3556     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3557 
3558     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3559 
3560   } else { /* call == MAT_INITIAL_MATRIX) */
3561     PetscBool flg;
3562 
3563     PetscCall(ISGetLocalSize(iscol, &n));
3564     PetscCall(ISGetSize(iscol, &Ncols));
3565 
3566     /* (1) iscol -> nonscalable iscol_local */
3567     /* Check for special case: each processor gets entire matrix columns */
3568     PetscCall(ISIdentity(iscol_local, &flg));
3569     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3570     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3571     if (allcolumns) {
3572       iscol_sub = iscol_local;
3573       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3574       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3575 
3576     } else {
3577       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3578       PetscInt *idx, *cmap1, k;
3579       PetscCall(PetscMalloc1(Ncols, &idx));
3580       PetscCall(PetscMalloc1(Ncols, &cmap1));
3581       PetscCall(ISGetIndices(iscol_local, &is_idx));
3582       count = 0;
3583       k     = 0;
3584       for (i = 0; i < Ncols; i++) {
3585         j = is_idx[i];
3586         if (j >= cstart && j < cend) {
3587           /* diagonal part of mat */
3588           idx[count]     = j;
3589           cmap1[count++] = i; /* column index in submat */
3590         } else if (Bn) {
3591           /* off-diagonal part of mat */
3592           if (j == garray[k]) {
3593             idx[count]     = j;
3594             cmap1[count++] = i; /* column index in submat */
3595           } else if (j > garray[k]) {
3596             while (j > garray[k] && k < Bn - 1) k++;
3597             if (j == garray[k]) {
3598               idx[count]     = j;
3599               cmap1[count++] = i; /* column index in submat */
3600             }
3601           }
3602         }
3603       }
3604       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3605 
3606       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3607       PetscCall(ISGetBlockSize(iscol, &cbs));
3608       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3609 
3610       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3611     }
3612 
3613     /* (3) Create sequential Msub */
3614     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3615   }
3616 
3617   PetscCall(ISGetLocalSize(iscol_sub, &count));
3618   aij = (Mat_SeqAIJ *)(Msub)->data;
3619   ii  = aij->i;
3620   PetscCall(ISGetIndices(iscmap, &cmap));
3621 
3622   /*
3623       m - number of local rows
3624       Ncols - number of columns (same on all processors)
3625       rstart - first row in new global matrix generated
3626   */
3627   PetscCall(MatGetSize(Msub, &m, NULL));
3628 
3629   if (call == MAT_INITIAL_MATRIX) {
3630     /* (4) Create parallel newmat */
3631     PetscMPIInt rank, size;
3632     PetscInt    csize;
3633 
3634     PetscCallMPI(MPI_Comm_size(comm, &size));
3635     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3636 
3637     /*
3638         Determine the number of non-zeros in the diagonal and off-diagonal
3639         portions of the matrix in order to do correct preallocation
3640     */
3641 
3642     /* first get start and end of "diagonal" columns */
3643     PetscCall(ISGetLocalSize(iscol, &csize));
3644     if (csize == PETSC_DECIDE) {
3645       PetscCall(ISGetSize(isrow, &mglobal));
3646       if (mglobal == Ncols) { /* square matrix */
3647         nlocal = m;
3648       } else {
3649         nlocal = Ncols / size + ((Ncols % size) > rank);
3650       }
3651     } else {
3652       nlocal = csize;
3653     }
3654     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3655     rstart = rend - nlocal;
3656     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3657 
3658     /* next, compute all the lengths */
3659     jj = aij->j;
3660     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3661     olens = dlens + m;
3662     for (i = 0; i < m; i++) {
3663       jend = ii[i + 1] - ii[i];
3664       olen = 0;
3665       dlen = 0;
3666       for (j = 0; j < jend; j++) {
3667         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3668         else dlen++;
3669         jj++;
3670       }
3671       olens[i] = olen;
3672       dlens[i] = dlen;
3673     }
3674 
3675     PetscCall(ISGetBlockSize(isrow, &bs));
3676     PetscCall(ISGetBlockSize(iscol, &cbs));
3677 
3678     PetscCall(MatCreate(comm, &M));
3679     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3680     PetscCall(MatSetBlockSizes(M, bs, cbs));
3681     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3682     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3683     PetscCall(PetscFree(dlens));
3684 
3685   } else { /* call == MAT_REUSE_MATRIX */
3686     M = *newmat;
3687     PetscCall(MatGetLocalSize(M, &i, NULL));
3688     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3689     PetscCall(MatZeroEntries(M));
3690     /*
3691          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3692        rather than the slower MatSetValues().
3693     */
3694     M->was_assembled = PETSC_TRUE;
3695     M->assembled     = PETSC_FALSE;
3696   }
3697 
3698   /* (5) Set values of Msub to *newmat */
3699   PetscCall(PetscMalloc1(count, &colsub));
3700   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3701 
3702   jj = aij->j;
3703   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3704   for (i = 0; i < m; i++) {
3705     row = rstart + i;
3706     nz  = ii[i + 1] - ii[i];
3707     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3708     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3709     jj += nz;
3710     aa += nz;
3711   }
3712   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3713   PetscCall(ISRestoreIndices(iscmap, &cmap));
3714 
3715   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3716   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3717 
3718   PetscCall(PetscFree(colsub));
3719 
3720   /* save Msub, iscol_sub and iscmap used in processor for next request */
3721   if (call == MAT_INITIAL_MATRIX) {
3722     *newmat = M;
3723     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3724     PetscCall(MatDestroy(&Msub));
3725 
3726     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3727     PetscCall(ISDestroy(&iscol_sub));
3728 
3729     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3730     PetscCall(ISDestroy(&iscmap));
3731 
3732     if (iscol_local) {
3733       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3734       PetscCall(ISDestroy(&iscol_local));
3735     }
3736   }
3737   PetscFunctionReturn(PETSC_SUCCESS);
3738 }
3739 
3740 /*
3741     Not great since it makes two copies of the submatrix, first an SeqAIJ
3742   in local and then by concatenating the local matrices the end result.
3743   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3744 
3745   This requires a sequential iscol with all indices.
3746 */
3747 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3748 {
3749   PetscMPIInt rank, size;
3750   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3751   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3752   Mat         M, Mreuse;
3753   MatScalar  *aa, *vwork;
3754   MPI_Comm    comm;
3755   Mat_SeqAIJ *aij;
3756   PetscBool   colflag, allcolumns = PETSC_FALSE;
3757 
3758   PetscFunctionBegin;
3759   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3760   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3761   PetscCallMPI(MPI_Comm_size(comm, &size));
3762 
3763   /* Check for special case: each processor gets entire matrix columns */
3764   PetscCall(ISIdentity(iscol, &colflag));
3765   PetscCall(ISGetLocalSize(iscol, &n));
3766   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3767   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3768 
3769   if (call == MAT_REUSE_MATRIX) {
3770     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3771     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3772     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3773   } else {
3774     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3775   }
3776 
3777   /*
3778       m - number of local rows
3779       n - number of columns (same on all processors)
3780       rstart - first row in new global matrix generated
3781   */
3782   PetscCall(MatGetSize(Mreuse, &m, &n));
3783   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3784   if (call == MAT_INITIAL_MATRIX) {
3785     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3786     ii  = aij->i;
3787     jj  = aij->j;
3788 
3789     /*
3790         Determine the number of non-zeros in the diagonal and off-diagonal
3791         portions of the matrix in order to do correct preallocation
3792     */
3793 
3794     /* first get start and end of "diagonal" columns */
3795     if (csize == PETSC_DECIDE) {
3796       PetscCall(ISGetSize(isrow, &mglobal));
3797       if (mglobal == n) { /* square matrix */
3798         nlocal = m;
3799       } else {
3800         nlocal = n / size + ((n % size) > rank);
3801       }
3802     } else {
3803       nlocal = csize;
3804     }
3805     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3806     rstart = rend - nlocal;
3807     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3808 
3809     /* next, compute all the lengths */
3810     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3811     olens = dlens + m;
3812     for (i = 0; i < m; i++) {
3813       jend = ii[i + 1] - ii[i];
3814       olen = 0;
3815       dlen = 0;
3816       for (j = 0; j < jend; j++) {
3817         if (*jj < rstart || *jj >= rend) olen++;
3818         else dlen++;
3819         jj++;
3820       }
3821       olens[i] = olen;
3822       dlens[i] = dlen;
3823     }
3824     PetscCall(MatCreate(comm, &M));
3825     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3826     PetscCall(MatSetBlockSizes(M, bs, cbs));
3827     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3828     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3829     PetscCall(PetscFree(dlens));
3830   } else {
3831     PetscInt ml, nl;
3832 
3833     M = *newmat;
3834     PetscCall(MatGetLocalSize(M, &ml, &nl));
3835     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3836     PetscCall(MatZeroEntries(M));
3837     /*
3838          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3839        rather than the slower MatSetValues().
3840     */
3841     M->was_assembled = PETSC_TRUE;
3842     M->assembled     = PETSC_FALSE;
3843   }
3844   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3845   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3846   ii  = aij->i;
3847   jj  = aij->j;
3848 
3849   /* trigger copy to CPU if needed */
3850   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3851   for (i = 0; i < m; i++) {
3852     row   = rstart + i;
3853     nz    = ii[i + 1] - ii[i];
3854     cwork = jj;
3855     jj += nz;
3856     vwork = aa;
3857     aa += nz;
3858     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3859   }
3860   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3861 
3862   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3863   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3864   *newmat = M;
3865 
3866   /* save submatrix used in processor for next request */
3867   if (call == MAT_INITIAL_MATRIX) {
3868     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3869     PetscCall(MatDestroy(&Mreuse));
3870   }
3871   PetscFunctionReturn(PETSC_SUCCESS);
3872 }
3873 
3874 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3875 {
3876   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3877   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3878   const PetscInt *JJ;
3879   PetscBool       nooffprocentries;
3880   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3881 
3882   PetscFunctionBegin;
3883   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3884 
3885   PetscCall(PetscLayoutSetUp(B->rmap));
3886   PetscCall(PetscLayoutSetUp(B->cmap));
3887   m      = B->rmap->n;
3888   cstart = B->cmap->rstart;
3889   cend   = B->cmap->rend;
3890   rstart = B->rmap->rstart;
3891 
3892   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3893 
3894   if (PetscDefined(USE_DEBUG)) {
3895     for (i = 0; i < m; i++) {
3896       nnz = Ii[i + 1] - Ii[i];
3897       JJ  = J + Ii[i];
3898       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3899       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3900       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3901     }
3902   }
3903 
3904   for (i = 0; i < m; i++) {
3905     nnz     = Ii[i + 1] - Ii[i];
3906     JJ      = J + Ii[i];
3907     nnz_max = PetscMax(nnz_max, nnz);
3908     d       = 0;
3909     for (j = 0; j < nnz; j++) {
3910       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3911     }
3912     d_nnz[i] = d;
3913     o_nnz[i] = nnz - d;
3914   }
3915   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3916   PetscCall(PetscFree2(d_nnz, o_nnz));
3917 
3918   for (i = 0; i < m; i++) {
3919     ii = i + rstart;
3920     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3921   }
3922   nooffprocentries    = B->nooffprocentries;
3923   B->nooffprocentries = PETSC_TRUE;
3924   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3925   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3926   B->nooffprocentries = nooffprocentries;
3927 
3928   /* count number of entries below block diagonal */
3929   PetscCall(PetscFree(Aij->ld));
3930   PetscCall(PetscCalloc1(m, &ld));
3931   Aij->ld = ld;
3932   for (i = 0; i < m; i++) {
3933     nnz = Ii[i + 1] - Ii[i];
3934     j   = 0;
3935     while (j < nnz && J[j] < cstart) j++;
3936     ld[i] = j;
3937     J += nnz;
3938   }
3939 
3940   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3941   PetscFunctionReturn(PETSC_SUCCESS);
3942 }
3943 
3944 /*@
3945   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3946   (the default parallel PETSc format).
3947 
3948   Collective
3949 
3950   Input Parameters:
3951 + B - the matrix
3952 . i - the indices into j for the start of each local row (starts with zero)
3953 . j - the column indices for each local row (starts with zero)
3954 - v - optional values in the matrix
3955 
3956   Level: developer
3957 
3958   Notes:
3959   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3960   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3961   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3962 
3963   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3964 
3965   The format which is used for the sparse matrix input, is equivalent to a
3966   row-major ordering.. i.e for the following matrix, the input data expected is
3967   as shown
3968 
3969 .vb
3970         1 0 0
3971         2 0 3     P0
3972        -------
3973         4 5 6     P1
3974 
3975      Process0 [P0] rows_owned=[0,1]
3976         i =  {0,1,3}  [size = nrow+1  = 2+1]
3977         j =  {0,0,2}  [size = 3]
3978         v =  {1,2,3}  [size = 3]
3979 
3980      Process1 [P1] rows_owned=[2]
3981         i =  {0,3}    [size = nrow+1  = 1+1]
3982         j =  {0,1,2}  [size = 3]
3983         v =  {4,5,6}  [size = 3]
3984 .ve
3985 
3986 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
3987           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3988 @*/
3989 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3990 {
3991   PetscFunctionBegin;
3992   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3993   PetscFunctionReturn(PETSC_SUCCESS);
3994 }
3995 
3996 /*@C
3997   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3998   (the default parallel PETSc format).  For good matrix assembly performance
3999   the user should preallocate the matrix storage by setting the parameters
4000   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4001 
4002   Collective
4003 
4004   Input Parameters:
4005 + B     - the matrix
4006 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4007            (same value is used for all local rows)
4008 . d_nnz - array containing the number of nonzeros in the various rows of the
4009            DIAGONAL portion of the local submatrix (possibly different for each row)
4010            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4011            The size of this array is equal to the number of local rows, i.e 'm'.
4012            For matrices that will be factored, you must leave room for (and set)
4013            the diagonal entry even if it is zero.
4014 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4015            submatrix (same value is used for all local rows).
4016 - o_nnz - array containing the number of nonzeros in the various rows of the
4017            OFF-DIAGONAL portion of the local submatrix (possibly different for
4018            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4019            structure. The size of this array is equal to the number
4020            of local rows, i.e 'm'.
4021 
4022   Usage:
4023   Consider the following 8x8 matrix with 34 non-zero values, that is
4024   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4025   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4026   as follows
4027 
4028 .vb
4029             1  2  0  |  0  3  0  |  0  4
4030     Proc0   0  5  6  |  7  0  0  |  8  0
4031             9  0 10  | 11  0  0  | 12  0
4032     -------------------------------------
4033            13  0 14  | 15 16 17  |  0  0
4034     Proc1   0 18  0  | 19 20 21  |  0  0
4035             0  0  0  | 22 23  0  | 24  0
4036     -------------------------------------
4037     Proc2  25 26 27  |  0  0 28  | 29  0
4038            30  0  0  | 31 32 33  |  0 34
4039 .ve
4040 
4041   This can be represented as a collection of submatrices as
4042 .vb
4043       A B C
4044       D E F
4045       G H I
4046 .ve
4047 
4048   Where the submatrices A,B,C are owned by proc0, D,E,F are
4049   owned by proc1, G,H,I are owned by proc2.
4050 
4051   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4052   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4053   The 'M','N' parameters are 8,8, and have the same values on all procs.
4054 
4055   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4056   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4057   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4058   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4059   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4060   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4061 
4062   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4063   allocated for every row of the local diagonal submatrix, and `o_nz`
4064   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4065   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4066   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4067   In this case, the values of `d_nz`, `o_nz` are
4068 .vb
4069      proc0  dnz = 2, o_nz = 2
4070      proc1  dnz = 3, o_nz = 2
4071      proc2  dnz = 1, o_nz = 4
4072 .ve
4073   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4074   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4075   for proc3. i.e we are using 12+15+10=37 storage locations to store
4076   34 values.
4077 
4078   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4079   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4080   In the above case the values for `d_nnz`, `o_nnz` are
4081 .vb
4082      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4083      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4084      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4085 .ve
4086   Here the space allocated is sum of all the above values i.e 34, and
4087   hence pre-allocation is perfect.
4088 
4089   Level: intermediate
4090 
4091   Notes:
4092   If the *_nnz parameter is given then the *_nz parameter is ignored
4093 
4094   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4095   storage.  The stored row and column indices begin with zero.
4096   See [Sparse Matrices](sec_matsparse) for details.
4097 
4098   The parallel matrix is partitioned such that the first m0 rows belong to
4099   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4100   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4101 
4102   The DIAGONAL portion of the local submatrix of a processor can be defined
4103   as the submatrix which is obtained by extraction the part corresponding to
4104   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4105   first row that belongs to the processor, r2 is the last row belonging to
4106   the this processor, and c1-c2 is range of indices of the local part of a
4107   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4108   common case of a square matrix, the row and column ranges are the same and
4109   the DIAGONAL part is also square. The remaining portion of the local
4110   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4111 
4112   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4113 
4114   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4115   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4116   You can also run with the option `-info` and look for messages with the string
4117   malloc in them to see if additional memory allocation was needed.
4118 
4119 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4120           `MatGetInfo()`, `PetscSplitOwnership()`
4121 @*/
4122 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4123 {
4124   PetscFunctionBegin;
4125   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4126   PetscValidType(B, 1);
4127   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4128   PetscFunctionReturn(PETSC_SUCCESS);
4129 }
4130 
4131 /*@
4132   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4133   CSR format for the local rows.
4134 
4135   Collective
4136 
4137   Input Parameters:
4138 + comm - MPI communicator
4139 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4140 . n    - This value should be the same as the local size used in creating the
4141        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4142        calculated if N is given) For square matrices n is almost always m.
4143 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4144 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4145 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4146 . j    - column indices
4147 - a    - optional matrix values
4148 
4149   Output Parameter:
4150 . mat - the matrix
4151 
4152   Level: intermediate
4153 
4154   Notes:
4155   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4156   thus you CANNOT change the matrix entries by changing the values of a[] after you have
4157   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4158 
4159   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4160 
4161   The format which is used for the sparse matrix input, is equivalent to a
4162   row-major ordering.. i.e for the following matrix, the input data expected is
4163   as shown
4164 
4165   Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4166 .vb
4167         1 0 0
4168         2 0 3     P0
4169        -------
4170         4 5 6     P1
4171 
4172      Process0 [P0] rows_owned=[0,1]
4173         i =  {0,1,3}  [size = nrow+1  = 2+1]
4174         j =  {0,0,2}  [size = 3]
4175         v =  {1,2,3}  [size = 3]
4176 
4177      Process1 [P1] rows_owned=[2]
4178         i =  {0,3}    [size = nrow+1  = 1+1]
4179         j =  {0,1,2}  [size = 3]
4180         v =  {4,5,6}  [size = 3]
4181 .ve
4182 
4183 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4184           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4185 @*/
4186 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4187 {
4188   PetscFunctionBegin;
4189   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4190   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4191   PetscCall(MatCreate(comm, mat));
4192   PetscCall(MatSetSizes(*mat, m, n, M, N));
4193   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4194   PetscCall(MatSetType(*mat, MATMPIAIJ));
4195   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4196   PetscFunctionReturn(PETSC_SUCCESS);
4197 }
4198 
4199 /*@
4200   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4201   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4202   from `MatCreateMPIAIJWithArrays()`
4203 
4204   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4205 
4206   Collective
4207 
4208   Input Parameters:
4209 + mat - the matrix
4210 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4211 . n   - This value should be the same as the local size used in creating the
4212        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4213        calculated if N is given) For square matrices n is almost always m.
4214 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4215 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4216 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4217 . J   - column indices
4218 - v   - matrix values
4219 
4220   Level: deprecated
4221 
4222 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4223           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4224 @*/
4225 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4226 {
4227   PetscInt        nnz, i;
4228   PetscBool       nooffprocentries;
4229   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4230   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4231   PetscScalar    *ad, *ao;
4232   PetscInt        ldi, Iii, md;
4233   const PetscInt *Adi = Ad->i;
4234   PetscInt       *ld  = Aij->ld;
4235 
4236   PetscFunctionBegin;
4237   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4238   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4239   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4240   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4241 
4242   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4243   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4244 
4245   for (i = 0; i < m; i++) {
4246     nnz = Ii[i + 1] - Ii[i];
4247     Iii = Ii[i];
4248     ldi = ld[i];
4249     md  = Adi[i + 1] - Adi[i];
4250     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4251     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4252     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4253     ad += md;
4254     ao += nnz - md;
4255   }
4256   nooffprocentries      = mat->nooffprocentries;
4257   mat->nooffprocentries = PETSC_TRUE;
4258   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4259   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4260   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4261   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4262   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4263   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4264   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4265   mat->nooffprocentries = nooffprocentries;
4266   PetscFunctionReturn(PETSC_SUCCESS);
4267 }
4268 
4269 /*@
4270   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4271 
4272   Collective
4273 
4274   Input Parameters:
4275 + mat - the matrix
4276 - v   - matrix values, stored by row
4277 
4278   Level: intermediate
4279 
4280   Note:
4281   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4282 
4283 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4284           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4285 @*/
4286 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4287 {
4288   PetscInt        nnz, i, m;
4289   PetscBool       nooffprocentries;
4290   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4291   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4292   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4293   PetscScalar    *ad, *ao;
4294   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4295   PetscInt        ldi, Iii, md;
4296   PetscInt       *ld = Aij->ld;
4297 
4298   PetscFunctionBegin;
4299   m = mat->rmap->n;
4300 
4301   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4302   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4303   Iii = 0;
4304   for (i = 0; i < m; i++) {
4305     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4306     ldi = ld[i];
4307     md  = Adi[i + 1] - Adi[i];
4308     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4309     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4310     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4311     ad += md;
4312     ao += nnz - md;
4313     Iii += nnz;
4314   }
4315   nooffprocentries      = mat->nooffprocentries;
4316   mat->nooffprocentries = PETSC_TRUE;
4317   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4318   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4319   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4320   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4321   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4322   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4323   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4324   mat->nooffprocentries = nooffprocentries;
4325   PetscFunctionReturn(PETSC_SUCCESS);
4326 }
4327 
4328 /*@C
4329   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4330   (the default parallel PETSc format).  For good matrix assembly performance
4331   the user should preallocate the matrix storage by setting the parameters
4332   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4333 
4334   Collective
4335 
4336   Input Parameters:
4337 + comm  - MPI communicator
4338 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4339            This value should be the same as the local size used in creating the
4340            y vector for the matrix-vector product y = Ax.
4341 . n     - This value should be the same as the local size used in creating the
4342        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4343        calculated if N is given) For square matrices n is almost always m.
4344 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4345 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4346 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4347            (same value is used for all local rows)
4348 . d_nnz - array containing the number of nonzeros in the various rows of the
4349            DIAGONAL portion of the local submatrix (possibly different for each row)
4350            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4351            The size of this array is equal to the number of local rows, i.e 'm'.
4352 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4353            submatrix (same value is used for all local rows).
4354 - o_nnz - array containing the number of nonzeros in the various rows of the
4355            OFF-DIAGONAL portion of the local submatrix (possibly different for
4356            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4357            structure. The size of this array is equal to the number
4358            of local rows, i.e 'm'.
4359 
4360   Output Parameter:
4361 . A - the matrix
4362 
4363   Options Database Keys:
4364 + -mat_no_inode                     - Do not use inodes
4365 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4366 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4367         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4368         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4369 
4370   Level: intermediate
4371 
4372   Notes:
4373   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4374   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4375   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4376 
4377   If the *_nnz parameter is given then the *_nz parameter is ignored
4378 
4379   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4380   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4381   storage requirements for this matrix.
4382 
4383   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4384   processor than it must be used on all processors that share the object for
4385   that argument.
4386 
4387   The user MUST specify either the local or global matrix dimensions
4388   (possibly both).
4389 
4390   The parallel matrix is partitioned across processors such that the
4391   first m0 rows belong to process 0, the next m1 rows belong to
4392   process 1, the next m2 rows belong to process 2 etc.. where
4393   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4394   values corresponding to [m x N] submatrix.
4395 
4396   The columns are logically partitioned with the n0 columns belonging
4397   to 0th partition, the next n1 columns belonging to the next
4398   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4399 
4400   The DIAGONAL portion of the local submatrix on any given processor
4401   is the submatrix corresponding to the rows and columns m,n
4402   corresponding to the given processor. i.e diagonal matrix on
4403   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4404   etc. The remaining portion of the local submatrix [m x (N-n)]
4405   constitute the OFF-DIAGONAL portion. The example below better
4406   illustrates this concept.
4407 
4408   For a square global matrix we define each processor's diagonal portion
4409   to be its local rows and the corresponding columns (a square submatrix);
4410   each processor's off-diagonal portion encompasses the remainder of the
4411   local matrix (a rectangular submatrix).
4412 
4413   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4414 
4415   When calling this routine with a single process communicator, a matrix of
4416   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4417   type of communicator, use the construction mechanism
4418 .vb
4419      MatCreate(...,&A);
4420      MatSetType(A,MATMPIAIJ);
4421      MatSetSizes(A, m,n,M,N);
4422      MatMPIAIJSetPreallocation(A,...);
4423 .ve
4424 
4425   By default, this format uses inodes (identical nodes) when possible.
4426   We search for consecutive rows with the same nonzero structure, thereby
4427   reusing matrix information to achieve increased efficiency.
4428 
4429   Usage:
4430   Consider the following 8x8 matrix with 34 non-zero values, that is
4431   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4432   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4433   as follows
4434 
4435 .vb
4436             1  2  0  |  0  3  0  |  0  4
4437     Proc0   0  5  6  |  7  0  0  |  8  0
4438             9  0 10  | 11  0  0  | 12  0
4439     -------------------------------------
4440            13  0 14  | 15 16 17  |  0  0
4441     Proc1   0 18  0  | 19 20 21  |  0  0
4442             0  0  0  | 22 23  0  | 24  0
4443     -------------------------------------
4444     Proc2  25 26 27  |  0  0 28  | 29  0
4445            30  0  0  | 31 32 33  |  0 34
4446 .ve
4447 
4448   This can be represented as a collection of submatrices as
4449 
4450 .vb
4451       A B C
4452       D E F
4453       G H I
4454 .ve
4455 
4456   Where the submatrices A,B,C are owned by proc0, D,E,F are
4457   owned by proc1, G,H,I are owned by proc2.
4458 
4459   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4460   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4461   The 'M','N' parameters are 8,8, and have the same values on all procs.
4462 
4463   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4464   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4465   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4466   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4467   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4468   matrix, ans [DF] as another SeqAIJ matrix.
4469 
4470   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4471   allocated for every row of the local diagonal submatrix, and `o_nz`
4472   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4473   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4474   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4475   In this case, the values of `d_nz`,`o_nz` are
4476 .vb
4477      proc0  dnz = 2, o_nz = 2
4478      proc1  dnz = 3, o_nz = 2
4479      proc2  dnz = 1, o_nz = 4
4480 .ve
4481   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4482   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4483   for proc3. i.e we are using 12+15+10=37 storage locations to store
4484   34 values.
4485 
4486   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4487   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4488   In the above case the values for d_nnz,o_nnz are
4489 .vb
4490      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4491      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4492      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4493 .ve
4494   Here the space allocated is sum of all the above values i.e 34, and
4495   hence pre-allocation is perfect.
4496 
4497 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4498           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4499 @*/
4500 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4501 {
4502   PetscMPIInt size;
4503 
4504   PetscFunctionBegin;
4505   PetscCall(MatCreate(comm, A));
4506   PetscCall(MatSetSizes(*A, m, n, M, N));
4507   PetscCallMPI(MPI_Comm_size(comm, &size));
4508   if (size > 1) {
4509     PetscCall(MatSetType(*A, MATMPIAIJ));
4510     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4511   } else {
4512     PetscCall(MatSetType(*A, MATSEQAIJ));
4513     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4514   }
4515   PetscFunctionReturn(PETSC_SUCCESS);
4516 }
4517 
4518 /*MC
4519     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4520 
4521     Synopsis:
4522     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4523 
4524     Not Collective
4525 
4526     Input Parameter:
4527 .   A - the `MATMPIAIJ` matrix
4528 
4529     Output Parameters:
4530 +   Ad - the diagonal portion of the matrix
4531 .   Ao - the off diagonal portion of the matrix
4532 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4533 -   ierr - error code
4534 
4535      Level: advanced
4536 
4537     Note:
4538     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4539 
4540 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4541 M*/
4542 
4543 /*MC
4544     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4545 
4546     Synopsis:
4547     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4548 
4549     Not Collective
4550 
4551     Input Parameters:
4552 +   A - the `MATMPIAIJ` matrix
4553 .   Ad - the diagonal portion of the matrix
4554 .   Ao - the off diagonal portion of the matrix
4555 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4556 -   ierr - error code
4557 
4558      Level: advanced
4559 
4560 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4561 M*/
4562 
4563 /*@C
4564   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4565 
4566   Not Collective
4567 
4568   Input Parameter:
4569 . A - The `MATMPIAIJ` matrix
4570 
4571   Output Parameters:
4572 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4573 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4574 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4575 
4576   Level: intermediate
4577 
4578   Note:
4579   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4580   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4581   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4582   local column numbers to global column numbers in the original matrix.
4583 
4584   Fortran Notes:
4585   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4586 
4587 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4588 @*/
4589 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4590 {
4591   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4592   PetscBool   flg;
4593 
4594   PetscFunctionBegin;
4595   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4596   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4597   if (Ad) *Ad = a->A;
4598   if (Ao) *Ao = a->B;
4599   if (colmap) *colmap = a->garray;
4600   PetscFunctionReturn(PETSC_SUCCESS);
4601 }
4602 
4603 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4604 {
4605   PetscInt     m, N, i, rstart, nnz, Ii;
4606   PetscInt    *indx;
4607   PetscScalar *values;
4608   MatType      rootType;
4609 
4610   PetscFunctionBegin;
4611   PetscCall(MatGetSize(inmat, &m, &N));
4612   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4613     PetscInt *dnz, *onz, sum, bs, cbs;
4614 
4615     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4616     /* Check sum(n) = N */
4617     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4618     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4619 
4620     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4621     rstart -= m;
4622 
4623     MatPreallocateBegin(comm, m, n, dnz, onz);
4624     for (i = 0; i < m; i++) {
4625       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4626       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4627       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4628     }
4629 
4630     PetscCall(MatCreate(comm, outmat));
4631     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4632     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4633     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4634     PetscCall(MatGetRootType_Private(inmat, &rootType));
4635     PetscCall(MatSetType(*outmat, rootType));
4636     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4637     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4638     MatPreallocateEnd(dnz, onz);
4639     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4640   }
4641 
4642   /* numeric phase */
4643   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4644   for (i = 0; i < m; i++) {
4645     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4646     Ii = i + rstart;
4647     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4648     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4649   }
4650   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4651   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4652   PetscFunctionReturn(PETSC_SUCCESS);
4653 }
4654 
4655 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4656 {
4657   PetscMPIInt        rank;
4658   PetscInt           m, N, i, rstart, nnz;
4659   size_t             len;
4660   const PetscInt    *indx;
4661   PetscViewer        out;
4662   char              *name;
4663   Mat                B;
4664   const PetscScalar *values;
4665 
4666   PetscFunctionBegin;
4667   PetscCall(MatGetLocalSize(A, &m, NULL));
4668   PetscCall(MatGetSize(A, NULL, &N));
4669   /* Should this be the type of the diagonal block of A? */
4670   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4671   PetscCall(MatSetSizes(B, m, N, m, N));
4672   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4673   PetscCall(MatSetType(B, MATSEQAIJ));
4674   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4675   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4676   for (i = 0; i < m; i++) {
4677     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4678     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4679     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4680   }
4681   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4682   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4683 
4684   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4685   PetscCall(PetscStrlen(outfile, &len));
4686   PetscCall(PetscMalloc1(len + 6, &name));
4687   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4688   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4689   PetscCall(PetscFree(name));
4690   PetscCall(MatView(B, out));
4691   PetscCall(PetscViewerDestroy(&out));
4692   PetscCall(MatDestroy(&B));
4693   PetscFunctionReturn(PETSC_SUCCESS);
4694 }
4695 
4696 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4697 {
4698   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4699 
4700   PetscFunctionBegin;
4701   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4702   PetscCall(PetscFree(merge->id_r));
4703   PetscCall(PetscFree(merge->len_s));
4704   PetscCall(PetscFree(merge->len_r));
4705   PetscCall(PetscFree(merge->bi));
4706   PetscCall(PetscFree(merge->bj));
4707   PetscCall(PetscFree(merge->buf_ri[0]));
4708   PetscCall(PetscFree(merge->buf_ri));
4709   PetscCall(PetscFree(merge->buf_rj[0]));
4710   PetscCall(PetscFree(merge->buf_rj));
4711   PetscCall(PetscFree(merge->coi));
4712   PetscCall(PetscFree(merge->coj));
4713   PetscCall(PetscFree(merge->owners_co));
4714   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4715   PetscCall(PetscFree(merge));
4716   PetscFunctionReturn(PETSC_SUCCESS);
4717 }
4718 
4719 #include <../src/mat/utils/freespace.h>
4720 #include <petscbt.h>
4721 
4722 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4723 {
4724   MPI_Comm             comm;
4725   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4726   PetscMPIInt          size, rank, taga, *len_s;
4727   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4728   PetscInt             proc, m;
4729   PetscInt           **buf_ri, **buf_rj;
4730   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4731   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4732   MPI_Request         *s_waits, *r_waits;
4733   MPI_Status          *status;
4734   const MatScalar     *aa, *a_a;
4735   MatScalar          **abuf_r, *ba_i;
4736   Mat_Merge_SeqsToMPI *merge;
4737   PetscContainer       container;
4738 
4739   PetscFunctionBegin;
4740   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4741   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4742 
4743   PetscCallMPI(MPI_Comm_size(comm, &size));
4744   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4745 
4746   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4747   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4748   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4749   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4750   aa = a_a;
4751 
4752   bi     = merge->bi;
4753   bj     = merge->bj;
4754   buf_ri = merge->buf_ri;
4755   buf_rj = merge->buf_rj;
4756 
4757   PetscCall(PetscMalloc1(size, &status));
4758   owners = merge->rowmap->range;
4759   len_s  = merge->len_s;
4760 
4761   /* send and recv matrix values */
4762   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4763   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4764 
4765   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4766   for (proc = 0, k = 0; proc < size; proc++) {
4767     if (!len_s[proc]) continue;
4768     i = owners[proc];
4769     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4770     k++;
4771   }
4772 
4773   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4774   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4775   PetscCall(PetscFree(status));
4776 
4777   PetscCall(PetscFree(s_waits));
4778   PetscCall(PetscFree(r_waits));
4779 
4780   /* insert mat values of mpimat */
4781   PetscCall(PetscMalloc1(N, &ba_i));
4782   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4783 
4784   for (k = 0; k < merge->nrecv; k++) {
4785     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4786     nrows       = *(buf_ri_k[k]);
4787     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4788     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4789   }
4790 
4791   /* set values of ba */
4792   m = merge->rowmap->n;
4793   for (i = 0; i < m; i++) {
4794     arow = owners[rank] + i;
4795     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4796     bnzi = bi[i + 1] - bi[i];
4797     PetscCall(PetscArrayzero(ba_i, bnzi));
4798 
4799     /* add local non-zero vals of this proc's seqmat into ba */
4800     anzi   = ai[arow + 1] - ai[arow];
4801     aj     = a->j + ai[arow];
4802     aa     = a_a + ai[arow];
4803     nextaj = 0;
4804     for (j = 0; nextaj < anzi; j++) {
4805       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4806         ba_i[j] += aa[nextaj++];
4807       }
4808     }
4809 
4810     /* add received vals into ba */
4811     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4812       /* i-th row */
4813       if (i == *nextrow[k]) {
4814         anzi   = *(nextai[k] + 1) - *nextai[k];
4815         aj     = buf_rj[k] + *(nextai[k]);
4816         aa     = abuf_r[k] + *(nextai[k]);
4817         nextaj = 0;
4818         for (j = 0; nextaj < anzi; j++) {
4819           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4820             ba_i[j] += aa[nextaj++];
4821           }
4822         }
4823         nextrow[k]++;
4824         nextai[k]++;
4825       }
4826     }
4827     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4828   }
4829   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4830   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4831   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4832 
4833   PetscCall(PetscFree(abuf_r[0]));
4834   PetscCall(PetscFree(abuf_r));
4835   PetscCall(PetscFree(ba_i));
4836   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4837   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4838   PetscFunctionReturn(PETSC_SUCCESS);
4839 }
4840 
4841 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4842 {
4843   Mat                  B_mpi;
4844   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4845   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4846   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4847   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4848   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4849   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4850   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4851   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4852   MPI_Status          *status;
4853   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4854   PetscBT              lnkbt;
4855   Mat_Merge_SeqsToMPI *merge;
4856   PetscContainer       container;
4857 
4858   PetscFunctionBegin;
4859   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4860 
4861   /* make sure it is a PETSc comm */
4862   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4863   PetscCallMPI(MPI_Comm_size(comm, &size));
4864   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4865 
4866   PetscCall(PetscNew(&merge));
4867   PetscCall(PetscMalloc1(size, &status));
4868 
4869   /* determine row ownership */
4870   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4871   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4872   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4873   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4874   PetscCall(PetscLayoutSetUp(merge->rowmap));
4875   PetscCall(PetscMalloc1(size, &len_si));
4876   PetscCall(PetscMalloc1(size, &merge->len_s));
4877 
4878   m      = merge->rowmap->n;
4879   owners = merge->rowmap->range;
4880 
4881   /* determine the number of messages to send, their lengths */
4882   len_s = merge->len_s;
4883 
4884   len          = 0; /* length of buf_si[] */
4885   merge->nsend = 0;
4886   for (proc = 0; proc < size; proc++) {
4887     len_si[proc] = 0;
4888     if (proc == rank) {
4889       len_s[proc] = 0;
4890     } else {
4891       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4892       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4893     }
4894     if (len_s[proc]) {
4895       merge->nsend++;
4896       nrows = 0;
4897       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4898         if (ai[i + 1] > ai[i]) nrows++;
4899       }
4900       len_si[proc] = 2 * (nrows + 1);
4901       len += len_si[proc];
4902     }
4903   }
4904 
4905   /* determine the number and length of messages to receive for ij-structure */
4906   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4907   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4908 
4909   /* post the Irecv of j-structure */
4910   PetscCall(PetscCommGetNewTag(comm, &tagj));
4911   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4912 
4913   /* post the Isend of j-structure */
4914   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4915 
4916   for (proc = 0, k = 0; proc < size; proc++) {
4917     if (!len_s[proc]) continue;
4918     i = owners[proc];
4919     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4920     k++;
4921   }
4922 
4923   /* receives and sends of j-structure are complete */
4924   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4925   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4926 
4927   /* send and recv i-structure */
4928   PetscCall(PetscCommGetNewTag(comm, &tagi));
4929   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4930 
4931   PetscCall(PetscMalloc1(len + 1, &buf_s));
4932   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4933   for (proc = 0, k = 0; proc < size; proc++) {
4934     if (!len_s[proc]) continue;
4935     /* form outgoing message for i-structure:
4936          buf_si[0]:                 nrows to be sent
4937                [1:nrows]:           row index (global)
4938                [nrows+1:2*nrows+1]: i-structure index
4939     */
4940     nrows       = len_si[proc] / 2 - 1;
4941     buf_si_i    = buf_si + nrows + 1;
4942     buf_si[0]   = nrows;
4943     buf_si_i[0] = 0;
4944     nrows       = 0;
4945     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4946       anzi = ai[i + 1] - ai[i];
4947       if (anzi) {
4948         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4949         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4950         nrows++;
4951       }
4952     }
4953     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4954     k++;
4955     buf_si += len_si[proc];
4956   }
4957 
4958   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4959   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4960 
4961   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4962   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4963 
4964   PetscCall(PetscFree(len_si));
4965   PetscCall(PetscFree(len_ri));
4966   PetscCall(PetscFree(rj_waits));
4967   PetscCall(PetscFree2(si_waits, sj_waits));
4968   PetscCall(PetscFree(ri_waits));
4969   PetscCall(PetscFree(buf_s));
4970   PetscCall(PetscFree(status));
4971 
4972   /* compute a local seq matrix in each processor */
4973   /* allocate bi array and free space for accumulating nonzero column info */
4974   PetscCall(PetscMalloc1(m + 1, &bi));
4975   bi[0] = 0;
4976 
4977   /* create and initialize a linked list */
4978   nlnk = N + 1;
4979   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4980 
4981   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4982   len = ai[owners[rank + 1]] - ai[owners[rank]];
4983   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4984 
4985   current_space = free_space;
4986 
4987   /* determine symbolic info for each local row */
4988   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4989 
4990   for (k = 0; k < merge->nrecv; k++) {
4991     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4992     nrows       = *buf_ri_k[k];
4993     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4994     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4995   }
4996 
4997   MatPreallocateBegin(comm, m, n, dnz, onz);
4998   len = 0;
4999   for (i = 0; i < m; i++) {
5000     bnzi = 0;
5001     /* add local non-zero cols of this proc's seqmat into lnk */
5002     arow = owners[rank] + i;
5003     anzi = ai[arow + 1] - ai[arow];
5004     aj   = a->j + ai[arow];
5005     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5006     bnzi += nlnk;
5007     /* add received col data into lnk */
5008     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5009       if (i == *nextrow[k]) {            /* i-th row */
5010         anzi = *(nextai[k] + 1) - *nextai[k];
5011         aj   = buf_rj[k] + *nextai[k];
5012         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5013         bnzi += nlnk;
5014         nextrow[k]++;
5015         nextai[k]++;
5016       }
5017     }
5018     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5019 
5020     /* if free space is not available, make more free space */
5021     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5022     /* copy data into free space, then initialize lnk */
5023     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5024     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5025 
5026     current_space->array += bnzi;
5027     current_space->local_used += bnzi;
5028     current_space->local_remaining -= bnzi;
5029 
5030     bi[i + 1] = bi[i] + bnzi;
5031   }
5032 
5033   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5034 
5035   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5036   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5037   PetscCall(PetscLLDestroy(lnk, lnkbt));
5038 
5039   /* create symbolic parallel matrix B_mpi */
5040   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5041   PetscCall(MatCreate(comm, &B_mpi));
5042   if (n == PETSC_DECIDE) {
5043     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5044   } else {
5045     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5046   }
5047   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5048   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5049   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5050   MatPreallocateEnd(dnz, onz);
5051   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5052 
5053   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5054   B_mpi->assembled = PETSC_FALSE;
5055   merge->bi        = bi;
5056   merge->bj        = bj;
5057   merge->buf_ri    = buf_ri;
5058   merge->buf_rj    = buf_rj;
5059   merge->coi       = NULL;
5060   merge->coj       = NULL;
5061   merge->owners_co = NULL;
5062 
5063   PetscCall(PetscCommDestroy(&comm));
5064 
5065   /* attach the supporting struct to B_mpi for reuse */
5066   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5067   PetscCall(PetscContainerSetPointer(container, merge));
5068   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5069   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5070   PetscCall(PetscContainerDestroy(&container));
5071   *mpimat = B_mpi;
5072 
5073   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5074   PetscFunctionReturn(PETSC_SUCCESS);
5075 }
5076 
5077 /*@C
5078   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5079   matrices from each processor
5080 
5081   Collective
5082 
5083   Input Parameters:
5084 + comm   - the communicators the parallel matrix will live on
5085 . seqmat - the input sequential matrices
5086 . m      - number of local rows (or `PETSC_DECIDE`)
5087 . n      - number of local columns (or `PETSC_DECIDE`)
5088 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5089 
5090   Output Parameter:
5091 . mpimat - the parallel matrix generated
5092 
5093   Level: advanced
5094 
5095   Note:
5096   The dimensions of the sequential matrix in each processor MUST be the same.
5097   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5098   destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5099 
5100 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5101 @*/
5102 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5103 {
5104   PetscMPIInt size;
5105 
5106   PetscFunctionBegin;
5107   PetscCallMPI(MPI_Comm_size(comm, &size));
5108   if (size == 1) {
5109     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5110     if (scall == MAT_INITIAL_MATRIX) {
5111       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5112     } else {
5113       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5114     }
5115     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5116     PetscFunctionReturn(PETSC_SUCCESS);
5117   }
5118   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5119   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5120   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5121   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5122   PetscFunctionReturn(PETSC_SUCCESS);
5123 }
5124 
5125 /*@
5126   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking its local rows and putting them into a sequential matrix with
5127   mlocal rows and n columns. Where mlocal is obtained with `MatGetLocalSize()` and n is the global column count obtained
5128   with `MatGetSize()`
5129 
5130   Not Collective
5131 
5132   Input Parameter:
5133 . A - the matrix
5134 
5135   Output Parameter:
5136 . A_loc - the local sequential matrix generated
5137 
5138   Level: developer
5139 
5140   Notes:
5141   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5142 
5143   Destroy the matrix with `MatDestroy()`
5144 
5145 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5146 @*/
5147 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5148 {
5149   PetscBool mpi;
5150 
5151   PetscFunctionBegin;
5152   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5153   if (mpi) {
5154     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5155   } else {
5156     *A_loc = A;
5157     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5158   }
5159   PetscFunctionReturn(PETSC_SUCCESS);
5160 }
5161 
5162 /*@
5163   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5164   mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5165   with `MatGetSize()`
5166 
5167   Not Collective
5168 
5169   Input Parameters:
5170 + A     - the matrix
5171 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5172 
5173   Output Parameter:
5174 . A_loc - the local sequential matrix generated
5175 
5176   Level: developer
5177 
5178   Notes:
5179   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5180 
5181   When the communicator associated with `A` has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A`.
5182   If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called.
5183   This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5184   modify the values of the returned `A_loc`.
5185 
5186 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5187 @*/
5188 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5189 {
5190   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5191   Mat_SeqAIJ        *mat, *a, *b;
5192   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5193   const PetscScalar *aa, *ba, *aav, *bav;
5194   PetscScalar       *ca, *cam;
5195   PetscMPIInt        size;
5196   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5197   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5198   PetscBool          match;
5199 
5200   PetscFunctionBegin;
5201   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5202   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5203   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5204   if (size == 1) {
5205     if (scall == MAT_INITIAL_MATRIX) {
5206       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5207       *A_loc = mpimat->A;
5208     } else if (scall == MAT_REUSE_MATRIX) {
5209       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5210     }
5211     PetscFunctionReturn(PETSC_SUCCESS);
5212   }
5213 
5214   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5215   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5216   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5217   ai = a->i;
5218   aj = a->j;
5219   bi = b->i;
5220   bj = b->j;
5221   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5222   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5223   aa = aav;
5224   ba = bav;
5225   if (scall == MAT_INITIAL_MATRIX) {
5226     PetscCall(PetscMalloc1(1 + am, &ci));
5227     ci[0] = 0;
5228     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5229     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5230     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5231     k = 0;
5232     for (i = 0; i < am; i++) {
5233       ncols_o = bi[i + 1] - bi[i];
5234       ncols_d = ai[i + 1] - ai[i];
5235       /* off-diagonal portion of A */
5236       for (jo = 0; jo < ncols_o; jo++) {
5237         col = cmap[*bj];
5238         if (col >= cstart) break;
5239         cj[k] = col;
5240         bj++;
5241         ca[k++] = *ba++;
5242       }
5243       /* diagonal portion of A */
5244       for (j = 0; j < ncols_d; j++) {
5245         cj[k]   = cstart + *aj++;
5246         ca[k++] = *aa++;
5247       }
5248       /* off-diagonal portion of A */
5249       for (j = jo; j < ncols_o; j++) {
5250         cj[k]   = cmap[*bj++];
5251         ca[k++] = *ba++;
5252       }
5253     }
5254     /* put together the new matrix */
5255     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5256     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5257     /* Since these are PETSc arrays, change flags to free them as necessary. */
5258     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5259     mat->free_a  = PETSC_TRUE;
5260     mat->free_ij = PETSC_TRUE;
5261     mat->nonew   = 0;
5262   } else if (scall == MAT_REUSE_MATRIX) {
5263     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5264     ci  = mat->i;
5265     cj  = mat->j;
5266     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5267     for (i = 0; i < am; i++) {
5268       /* off-diagonal portion of A */
5269       ncols_o = bi[i + 1] - bi[i];
5270       for (jo = 0; jo < ncols_o; jo++) {
5271         col = cmap[*bj];
5272         if (col >= cstart) break;
5273         *cam++ = *ba++;
5274         bj++;
5275       }
5276       /* diagonal portion of A */
5277       ncols_d = ai[i + 1] - ai[i];
5278       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5279       /* off-diagonal portion of A */
5280       for (j = jo; j < ncols_o; j++) {
5281         *cam++ = *ba++;
5282         bj++;
5283       }
5284     }
5285     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5286   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5287   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5288   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5289   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5290   PetscFunctionReturn(PETSC_SUCCESS);
5291 }
5292 
5293 /*@
5294   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5295   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5296 
5297   Not Collective
5298 
5299   Input Parameters:
5300 + A     - the matrix
5301 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5302 
5303   Output Parameters:
5304 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5305 - A_loc - the local sequential matrix generated
5306 
5307   Level: developer
5308 
5309   Note:
5310   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5311   part, then those associated with the off diagonal part (in its local ordering)
5312 
5313 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5314 @*/
5315 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5316 {
5317   Mat             Ao, Ad;
5318   const PetscInt *cmap;
5319   PetscMPIInt     size;
5320   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5321 
5322   PetscFunctionBegin;
5323   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5324   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5325   if (size == 1) {
5326     if (scall == MAT_INITIAL_MATRIX) {
5327       PetscCall(PetscObjectReference((PetscObject)Ad));
5328       *A_loc = Ad;
5329     } else if (scall == MAT_REUSE_MATRIX) {
5330       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5331     }
5332     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5333     PetscFunctionReturn(PETSC_SUCCESS);
5334   }
5335   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5336   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5337   if (f) {
5338     PetscCall((*f)(A, scall, glob, A_loc));
5339   } else {
5340     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5341     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5342     Mat_SeqAIJ        *c;
5343     PetscInt          *ai = a->i, *aj = a->j;
5344     PetscInt          *bi = b->i, *bj = b->j;
5345     PetscInt          *ci, *cj;
5346     const PetscScalar *aa, *ba;
5347     PetscScalar       *ca;
5348     PetscInt           i, j, am, dn, on;
5349 
5350     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5351     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5352     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5353     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5354     if (scall == MAT_INITIAL_MATRIX) {
5355       PetscInt k;
5356       PetscCall(PetscMalloc1(1 + am, &ci));
5357       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5358       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5359       ci[0] = 0;
5360       for (i = 0, k = 0; i < am; i++) {
5361         const PetscInt ncols_o = bi[i + 1] - bi[i];
5362         const PetscInt ncols_d = ai[i + 1] - ai[i];
5363         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5364         /* diagonal portion of A */
5365         for (j = 0; j < ncols_d; j++, k++) {
5366           cj[k] = *aj++;
5367           ca[k] = *aa++;
5368         }
5369         /* off-diagonal portion of A */
5370         for (j = 0; j < ncols_o; j++, k++) {
5371           cj[k] = dn + *bj++;
5372           ca[k] = *ba++;
5373         }
5374       }
5375       /* put together the new matrix */
5376       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5377       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5378       /* Since these are PETSc arrays, change flags to free them as necessary. */
5379       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5380       c->free_a  = PETSC_TRUE;
5381       c->free_ij = PETSC_TRUE;
5382       c->nonew   = 0;
5383       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5384     } else if (scall == MAT_REUSE_MATRIX) {
5385       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5386       for (i = 0; i < am; i++) {
5387         const PetscInt ncols_d = ai[i + 1] - ai[i];
5388         const PetscInt ncols_o = bi[i + 1] - bi[i];
5389         /* diagonal portion of A */
5390         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5391         /* off-diagonal portion of A */
5392         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5393       }
5394       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5395     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5396     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5397     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5398     if (glob) {
5399       PetscInt cst, *gidx;
5400 
5401       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5402       PetscCall(PetscMalloc1(dn + on, &gidx));
5403       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5404       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5405       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5406     }
5407   }
5408   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5409   PetscFunctionReturn(PETSC_SUCCESS);
5410 }
5411 
5412 /*@C
5413   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5414 
5415   Not Collective
5416 
5417   Input Parameters:
5418 + A     - the matrix
5419 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5420 . row   - index set of rows to extract (or `NULL`)
5421 - col   - index set of columns to extract (or `NULL`)
5422 
5423   Output Parameter:
5424 . A_loc - the local sequential matrix generated
5425 
5426   Level: developer
5427 
5428 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5429 @*/
5430 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5431 {
5432   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5433   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5434   IS          isrowa, iscola;
5435   Mat        *aloc;
5436   PetscBool   match;
5437 
5438   PetscFunctionBegin;
5439   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5440   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5441   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5442   if (!row) {
5443     start = A->rmap->rstart;
5444     end   = A->rmap->rend;
5445     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5446   } else {
5447     isrowa = *row;
5448   }
5449   if (!col) {
5450     start = A->cmap->rstart;
5451     cmap  = a->garray;
5452     nzA   = a->A->cmap->n;
5453     nzB   = a->B->cmap->n;
5454     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5455     ncols = 0;
5456     for (i = 0; i < nzB; i++) {
5457       if (cmap[i] < start) idx[ncols++] = cmap[i];
5458       else break;
5459     }
5460     imark = i;
5461     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5462     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5463     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5464   } else {
5465     iscola = *col;
5466   }
5467   if (scall != MAT_INITIAL_MATRIX) {
5468     PetscCall(PetscMalloc1(1, &aloc));
5469     aloc[0] = *A_loc;
5470   }
5471   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5472   if (!col) { /* attach global id of condensed columns */
5473     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5474   }
5475   *A_loc = aloc[0];
5476   PetscCall(PetscFree(aloc));
5477   if (!row) PetscCall(ISDestroy(&isrowa));
5478   if (!col) PetscCall(ISDestroy(&iscola));
5479   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5480   PetscFunctionReturn(PETSC_SUCCESS);
5481 }
5482 
5483 /*
5484  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5485  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5486  * on a global size.
5487  * */
5488 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5489 {
5490   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5491   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5492   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5493   PetscMPIInt            owner;
5494   PetscSFNode           *iremote, *oiremote;
5495   const PetscInt        *lrowindices;
5496   PetscSF                sf, osf;
5497   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5498   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5499   MPI_Comm               comm;
5500   ISLocalToGlobalMapping mapping;
5501   const PetscScalar     *pd_a, *po_a;
5502 
5503   PetscFunctionBegin;
5504   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5505   /* plocalsize is the number of roots
5506    * nrows is the number of leaves
5507    * */
5508   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5509   PetscCall(ISGetLocalSize(rows, &nrows));
5510   PetscCall(PetscCalloc1(nrows, &iremote));
5511   PetscCall(ISGetIndices(rows, &lrowindices));
5512   for (i = 0; i < nrows; i++) {
5513     /* Find a remote index and an owner for a row
5514      * The row could be local or remote
5515      * */
5516     owner = 0;
5517     lidx  = 0;
5518     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5519     iremote[i].index = lidx;
5520     iremote[i].rank  = owner;
5521   }
5522   /* Create SF to communicate how many nonzero columns for each row */
5523   PetscCall(PetscSFCreate(comm, &sf));
5524   /* SF will figure out the number of nonzero colunms for each row, and their
5525    * offsets
5526    * */
5527   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5528   PetscCall(PetscSFSetFromOptions(sf));
5529   PetscCall(PetscSFSetUp(sf));
5530 
5531   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5532   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5533   PetscCall(PetscCalloc1(nrows, &pnnz));
5534   roffsets[0] = 0;
5535   roffsets[1] = 0;
5536   for (i = 0; i < plocalsize; i++) {
5537     /* diag */
5538     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5539     /* off diag */
5540     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5541     /* compute offsets so that we relative location for each row */
5542     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5543     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5544   }
5545   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5546   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5547   /* 'r' means root, and 'l' means leaf */
5548   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5549   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5550   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5551   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5552   PetscCall(PetscSFDestroy(&sf));
5553   PetscCall(PetscFree(roffsets));
5554   PetscCall(PetscFree(nrcols));
5555   dntotalcols = 0;
5556   ontotalcols = 0;
5557   ncol        = 0;
5558   for (i = 0; i < nrows; i++) {
5559     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5560     ncol    = PetscMax(pnnz[i], ncol);
5561     /* diag */
5562     dntotalcols += nlcols[i * 2 + 0];
5563     /* off diag */
5564     ontotalcols += nlcols[i * 2 + 1];
5565   }
5566   /* We do not need to figure the right number of columns
5567    * since all the calculations will be done by going through the raw data
5568    * */
5569   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5570   PetscCall(MatSetUp(*P_oth));
5571   PetscCall(PetscFree(pnnz));
5572   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5573   /* diag */
5574   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5575   /* off diag */
5576   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5577   /* diag */
5578   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5579   /* off diag */
5580   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5581   dntotalcols = 0;
5582   ontotalcols = 0;
5583   ntotalcols  = 0;
5584   for (i = 0; i < nrows; i++) {
5585     owner = 0;
5586     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5587     /* Set iremote for diag matrix */
5588     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5589       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5590       iremote[dntotalcols].rank  = owner;
5591       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5592       ilocal[dntotalcols++] = ntotalcols++;
5593     }
5594     /* off diag */
5595     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5596       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5597       oiremote[ontotalcols].rank  = owner;
5598       oilocal[ontotalcols++]      = ntotalcols++;
5599     }
5600   }
5601   PetscCall(ISRestoreIndices(rows, &lrowindices));
5602   PetscCall(PetscFree(loffsets));
5603   PetscCall(PetscFree(nlcols));
5604   PetscCall(PetscSFCreate(comm, &sf));
5605   /* P serves as roots and P_oth is leaves
5606    * Diag matrix
5607    * */
5608   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5609   PetscCall(PetscSFSetFromOptions(sf));
5610   PetscCall(PetscSFSetUp(sf));
5611 
5612   PetscCall(PetscSFCreate(comm, &osf));
5613   /* Off diag */
5614   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5615   PetscCall(PetscSFSetFromOptions(osf));
5616   PetscCall(PetscSFSetUp(osf));
5617   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5618   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5619   /* We operate on the matrix internal data for saving memory */
5620   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5621   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5622   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5623   /* Convert to global indices for diag matrix */
5624   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5625   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5626   /* We want P_oth store global indices */
5627   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5628   /* Use memory scalable approach */
5629   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5630   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5631   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5632   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5633   /* Convert back to local indices */
5634   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5635   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5636   nout = 0;
5637   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5638   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5639   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5640   /* Exchange values */
5641   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5642   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5643   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5644   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5645   /* Stop PETSc from shrinking memory */
5646   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5647   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5648   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5649   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5650   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5651   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5652   PetscCall(PetscSFDestroy(&sf));
5653   PetscCall(PetscSFDestroy(&osf));
5654   PetscFunctionReturn(PETSC_SUCCESS);
5655 }
5656 
5657 /*
5658  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5659  * This supports MPIAIJ and MAIJ
5660  * */
5661 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5662 {
5663   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5664   Mat_SeqAIJ *p_oth;
5665   IS          rows, map;
5666   PetscHMapI  hamp;
5667   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5668   MPI_Comm    comm;
5669   PetscSF     sf, osf;
5670   PetscBool   has;
5671 
5672   PetscFunctionBegin;
5673   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5674   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5675   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5676    *  and then create a submatrix (that often is an overlapping matrix)
5677    * */
5678   if (reuse == MAT_INITIAL_MATRIX) {
5679     /* Use a hash table to figure out unique keys */
5680     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5681     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5682     count = 0;
5683     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5684     for (i = 0; i < a->B->cmap->n; i++) {
5685       key = a->garray[i] / dof;
5686       PetscCall(PetscHMapIHas(hamp, key, &has));
5687       if (!has) {
5688         mapping[i] = count;
5689         PetscCall(PetscHMapISet(hamp, key, count++));
5690       } else {
5691         /* Current 'i' has the same value the previous step */
5692         mapping[i] = count - 1;
5693       }
5694     }
5695     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5696     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5697     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5698     PetscCall(PetscCalloc1(htsize, &rowindices));
5699     off = 0;
5700     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5701     PetscCall(PetscHMapIDestroy(&hamp));
5702     PetscCall(PetscSortInt(htsize, rowindices));
5703     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5704     /* In case, the matrix was already created but users want to recreate the matrix */
5705     PetscCall(MatDestroy(P_oth));
5706     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5707     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5708     PetscCall(ISDestroy(&map));
5709     PetscCall(ISDestroy(&rows));
5710   } else if (reuse == MAT_REUSE_MATRIX) {
5711     /* If matrix was already created, we simply update values using SF objects
5712      * that as attached to the matrix earlier.
5713      */
5714     const PetscScalar *pd_a, *po_a;
5715 
5716     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5717     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5718     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5719     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5720     /* Update values in place */
5721     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5722     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5723     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5724     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5725     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5726     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5727     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5728     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5729   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5730   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5731   PetscFunctionReturn(PETSC_SUCCESS);
5732 }
5733 
5734 /*@C
5735   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5736 
5737   Collective
5738 
5739   Input Parameters:
5740 + A     - the first matrix in `MATMPIAIJ` format
5741 . B     - the second matrix in `MATMPIAIJ` format
5742 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5743 
5744   Output Parameters:
5745 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5746 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5747 - B_seq - the sequential matrix generated
5748 
5749   Level: developer
5750 
5751 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5752 @*/
5753 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5754 {
5755   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5756   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5757   IS          isrowb, iscolb;
5758   Mat        *bseq = NULL;
5759 
5760   PetscFunctionBegin;
5761   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5762              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5763   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5764 
5765   if (scall == MAT_INITIAL_MATRIX) {
5766     start = A->cmap->rstart;
5767     cmap  = a->garray;
5768     nzA   = a->A->cmap->n;
5769     nzB   = a->B->cmap->n;
5770     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5771     ncols = 0;
5772     for (i = 0; i < nzB; i++) { /* row < local row index */
5773       if (cmap[i] < start) idx[ncols++] = cmap[i];
5774       else break;
5775     }
5776     imark = i;
5777     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5778     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5779     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5780     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5781   } else {
5782     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5783     isrowb = *rowb;
5784     iscolb = *colb;
5785     PetscCall(PetscMalloc1(1, &bseq));
5786     bseq[0] = *B_seq;
5787   }
5788   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5789   *B_seq = bseq[0];
5790   PetscCall(PetscFree(bseq));
5791   if (!rowb) {
5792     PetscCall(ISDestroy(&isrowb));
5793   } else {
5794     *rowb = isrowb;
5795   }
5796   if (!colb) {
5797     PetscCall(ISDestroy(&iscolb));
5798   } else {
5799     *colb = iscolb;
5800   }
5801   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5802   PetscFunctionReturn(PETSC_SUCCESS);
5803 }
5804 
5805 /*
5806     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5807     of the OFF-DIAGONAL portion of local A
5808 
5809     Collective
5810 
5811    Input Parameters:
5812 +    A,B - the matrices in `MATMPIAIJ` format
5813 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5814 
5815    Output Parameter:
5816 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5817 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5818 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5819 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5820 
5821     Developer Note:
5822     This directly accesses information inside the VecScatter associated with the matrix-vector product
5823      for this matrix. This is not desirable..
5824 
5825     Level: developer
5826 
5827 */
5828 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5829 {
5830   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5831   Mat_SeqAIJ        *b_oth;
5832   VecScatter         ctx;
5833   MPI_Comm           comm;
5834   const PetscMPIInt *rprocs, *sprocs;
5835   const PetscInt    *srow, *rstarts, *sstarts;
5836   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5837   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5838   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5839   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5840   PetscMPIInt        size, tag, rank, nreqs;
5841 
5842   PetscFunctionBegin;
5843   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5844   PetscCallMPI(MPI_Comm_size(comm, &size));
5845 
5846   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5847              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5848   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5849   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5850 
5851   if (size == 1) {
5852     startsj_s = NULL;
5853     bufa_ptr  = NULL;
5854     *B_oth    = NULL;
5855     PetscFunctionReturn(PETSC_SUCCESS);
5856   }
5857 
5858   ctx = a->Mvctx;
5859   tag = ((PetscObject)ctx)->tag;
5860 
5861   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5862   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5863   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5864   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5865   PetscCall(PetscMalloc1(nreqs, &reqs));
5866   rwaits = reqs;
5867   swaits = reqs + nrecvs;
5868 
5869   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5870   if (scall == MAT_INITIAL_MATRIX) {
5871     /* i-array */
5872     /*  post receives */
5873     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5874     for (i = 0; i < nrecvs; i++) {
5875       rowlen = rvalues + rstarts[i] * rbs;
5876       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5877       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5878     }
5879 
5880     /* pack the outgoing message */
5881     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5882 
5883     sstartsj[0] = 0;
5884     rstartsj[0] = 0;
5885     len         = 0; /* total length of j or a array to be sent */
5886     if (nsends) {
5887       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5888       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5889     }
5890     for (i = 0; i < nsends; i++) {
5891       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5892       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5893       for (j = 0; j < nrows; j++) {
5894         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5895         for (l = 0; l < sbs; l++) {
5896           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5897 
5898           rowlen[j * sbs + l] = ncols;
5899 
5900           len += ncols;
5901           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5902         }
5903         k++;
5904       }
5905       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5906 
5907       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5908     }
5909     /* recvs and sends of i-array are completed */
5910     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5911     PetscCall(PetscFree(svalues));
5912 
5913     /* allocate buffers for sending j and a arrays */
5914     PetscCall(PetscMalloc1(len + 1, &bufj));
5915     PetscCall(PetscMalloc1(len + 1, &bufa));
5916 
5917     /* create i-array of B_oth */
5918     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5919 
5920     b_othi[0] = 0;
5921     len       = 0; /* total length of j or a array to be received */
5922     k         = 0;
5923     for (i = 0; i < nrecvs; i++) {
5924       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5925       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5926       for (j = 0; j < nrows; j++) {
5927         b_othi[k + 1] = b_othi[k] + rowlen[j];
5928         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5929         k++;
5930       }
5931       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5932     }
5933     PetscCall(PetscFree(rvalues));
5934 
5935     /* allocate space for j and a arrays of B_oth */
5936     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5937     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5938 
5939     /* j-array */
5940     /*  post receives of j-array */
5941     for (i = 0; i < nrecvs; i++) {
5942       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5943       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5944     }
5945 
5946     /* pack the outgoing message j-array */
5947     if (nsends) k = sstarts[0];
5948     for (i = 0; i < nsends; i++) {
5949       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5950       bufJ  = bufj + sstartsj[i];
5951       for (j = 0; j < nrows; j++) {
5952         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5953         for (ll = 0; ll < sbs; ll++) {
5954           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5955           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5956           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5957         }
5958       }
5959       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5960     }
5961 
5962     /* recvs and sends of j-array are completed */
5963     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5964   } else if (scall == MAT_REUSE_MATRIX) {
5965     sstartsj = *startsj_s;
5966     rstartsj = *startsj_r;
5967     bufa     = *bufa_ptr;
5968     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5969     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5970   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5971 
5972   /* a-array */
5973   /*  post receives of a-array */
5974   for (i = 0; i < nrecvs; i++) {
5975     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5976     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5977   }
5978 
5979   /* pack the outgoing message a-array */
5980   if (nsends) k = sstarts[0];
5981   for (i = 0; i < nsends; i++) {
5982     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5983     bufA  = bufa + sstartsj[i];
5984     for (j = 0; j < nrows; j++) {
5985       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5986       for (ll = 0; ll < sbs; ll++) {
5987         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5988         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5989         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5990       }
5991     }
5992     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5993   }
5994   /* recvs and sends of a-array are completed */
5995   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5996   PetscCall(PetscFree(reqs));
5997 
5998   if (scall == MAT_INITIAL_MATRIX) {
5999     /* put together the new matrix */
6000     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6001 
6002     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6003     /* Since these are PETSc arrays, change flags to free them as necessary. */
6004     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6005     b_oth->free_a  = PETSC_TRUE;
6006     b_oth->free_ij = PETSC_TRUE;
6007     b_oth->nonew   = 0;
6008 
6009     PetscCall(PetscFree(bufj));
6010     if (!startsj_s || !bufa_ptr) {
6011       PetscCall(PetscFree2(sstartsj, rstartsj));
6012       PetscCall(PetscFree(bufa_ptr));
6013     } else {
6014       *startsj_s = sstartsj;
6015       *startsj_r = rstartsj;
6016       *bufa_ptr  = bufa;
6017     }
6018   } else if (scall == MAT_REUSE_MATRIX) {
6019     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6020   }
6021 
6022   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6023   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6024   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6025   PetscFunctionReturn(PETSC_SUCCESS);
6026 }
6027 
6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6031 #if defined(PETSC_HAVE_MKL_SPARSE)
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6033 #endif
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6036 #if defined(PETSC_HAVE_ELEMENTAL)
6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6038 #endif
6039 #if defined(PETSC_HAVE_SCALAPACK)
6040 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6041 #endif
6042 #if defined(PETSC_HAVE_HYPRE)
6043 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6044 #endif
6045 #if defined(PETSC_HAVE_CUDA)
6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6047 #endif
6048 #if defined(PETSC_HAVE_HIP)
6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6050 #endif
6051 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6053 #endif
6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6055 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6056 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6057 
6058 /*
6059     Computes (B'*A')' since computing B*A directly is untenable
6060 
6061                n                       p                          p
6062         [             ]       [             ]         [                 ]
6063       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6064         [             ]       [             ]         [                 ]
6065 
6066 */
6067 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6068 {
6069   Mat At, Bt, Ct;
6070 
6071   PetscFunctionBegin;
6072   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6073   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6074   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6075   PetscCall(MatDestroy(&At));
6076   PetscCall(MatDestroy(&Bt));
6077   PetscCall(MatTransposeSetPrecursor(Ct, C));
6078   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6079   PetscCall(MatDestroy(&Ct));
6080   PetscFunctionReturn(PETSC_SUCCESS);
6081 }
6082 
6083 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6084 {
6085   PetscBool cisdense;
6086 
6087   PetscFunctionBegin;
6088   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6089   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6090   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6091   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6092   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6093   PetscCall(MatSetUp(C));
6094 
6095   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6096   PetscFunctionReturn(PETSC_SUCCESS);
6097 }
6098 
6099 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6100 {
6101   Mat_Product *product = C->product;
6102   Mat          A = product->A, B = product->B;
6103 
6104   PetscFunctionBegin;
6105   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6106              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6107   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6108   C->ops->productsymbolic = MatProductSymbolic_AB;
6109   PetscFunctionReturn(PETSC_SUCCESS);
6110 }
6111 
6112 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6113 {
6114   Mat_Product *product = C->product;
6115 
6116   PetscFunctionBegin;
6117   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6118   PetscFunctionReturn(PETSC_SUCCESS);
6119 }
6120 
6121 /*
6122    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6123 
6124   Input Parameters:
6125 
6126     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6127     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6128 
6129     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6130 
6131     For Set1, j1[] contains column indices of the nonzeros.
6132     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6133     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6134     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6135 
6136     Similar for Set2.
6137 
6138     This routine merges the two sets of nonzeros row by row and removes repeats.
6139 
6140   Output Parameters: (memory is allocated by the caller)
6141 
6142     i[],j[]: the CSR of the merged matrix, which has m rows.
6143     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6144     imap2[]: similar to imap1[], but for Set2.
6145     Note we order nonzeros row-by-row and from left to right.
6146 */
6147 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6148 {
6149   PetscInt   r, m; /* Row index of mat */
6150   PetscCount t, t1, t2, b1, e1, b2, e2;
6151 
6152   PetscFunctionBegin;
6153   PetscCall(MatGetLocalSize(mat, &m, NULL));
6154   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6155   i[0]        = 0;
6156   for (r = 0; r < m; r++) { /* Do row by row merging */
6157     b1 = rowBegin1[r];
6158     e1 = rowEnd1[r];
6159     b2 = rowBegin2[r];
6160     e2 = rowEnd2[r];
6161     while (b1 < e1 && b2 < e2) {
6162       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6163         j[t]      = j1[b1];
6164         imap1[t1] = t;
6165         imap2[t2] = t;
6166         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6167         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6168         t1++;
6169         t2++;
6170         t++;
6171       } else if (j1[b1] < j2[b2]) {
6172         j[t]      = j1[b1];
6173         imap1[t1] = t;
6174         b1 += jmap1[t1 + 1] - jmap1[t1];
6175         t1++;
6176         t++;
6177       } else {
6178         j[t]      = j2[b2];
6179         imap2[t2] = t;
6180         b2 += jmap2[t2 + 1] - jmap2[t2];
6181         t2++;
6182         t++;
6183       }
6184     }
6185     /* Merge the remaining in either j1[] or j2[] */
6186     while (b1 < e1) {
6187       j[t]      = j1[b1];
6188       imap1[t1] = t;
6189       b1 += jmap1[t1 + 1] - jmap1[t1];
6190       t1++;
6191       t++;
6192     }
6193     while (b2 < e2) {
6194       j[t]      = j2[b2];
6195       imap2[t2] = t;
6196       b2 += jmap2[t2 + 1] - jmap2[t2];
6197       t2++;
6198       t++;
6199     }
6200     i[r + 1] = t;
6201   }
6202   PetscFunctionReturn(PETSC_SUCCESS);
6203 }
6204 
6205 /*
6206   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6207 
6208   Input Parameters:
6209     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6210     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6211       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6212 
6213       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6214       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6215 
6216   Output Parameters:
6217     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6218     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6219       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6220       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6221 
6222     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6223       Atot: number of entries belonging to the diagonal block.
6224       Annz: number of unique nonzeros belonging to the diagonal block.
6225       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6226         repeats (i.e., same 'i,j' pair).
6227       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6228         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6229 
6230       Atot: number of entries belonging to the diagonal block
6231       Annz: number of unique nonzeros belonging to the diagonal block.
6232 
6233     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6234 
6235     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6236 */
6237 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6238 {
6239   PetscInt    cstart, cend, rstart, rend, row, col;
6240   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6241   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6242   PetscCount  k, m, p, q, r, s, mid;
6243   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6244 
6245   PetscFunctionBegin;
6246   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6247   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6248   m = rend - rstart;
6249 
6250   /* Skip negative rows */
6251   for (k = 0; k < n; k++)
6252     if (i[k] >= 0) break;
6253 
6254   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6255      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6256   */
6257   while (k < n) {
6258     row = i[k];
6259     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6260     for (s = k; s < n; s++)
6261       if (i[s] != row) break;
6262 
6263     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6264     for (p = k; p < s; p++) {
6265       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6266       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6267     }
6268     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6269     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6270     rowBegin[row - rstart] = k;
6271     rowMid[row - rstart]   = mid;
6272     rowEnd[row - rstart]   = s;
6273 
6274     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6275     Atot += mid - k;
6276     Btot += s - mid;
6277 
6278     /* Count unique nonzeros of this diag row */
6279     for (p = k; p < mid;) {
6280       col = j[p];
6281       do {
6282         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6283         p++;
6284       } while (p < mid && j[p] == col);
6285       Annz++;
6286     }
6287 
6288     /* Count unique nonzeros of this offdiag row */
6289     for (p = mid; p < s;) {
6290       col = j[p];
6291       do {
6292         p++;
6293       } while (p < s && j[p] == col);
6294       Bnnz++;
6295     }
6296     k = s;
6297   }
6298 
6299   /* Allocation according to Atot, Btot, Annz, Bnnz */
6300   PetscCall(PetscMalloc1(Atot, &Aperm));
6301   PetscCall(PetscMalloc1(Btot, &Bperm));
6302   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6303   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6304 
6305   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6306   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6307   for (r = 0; r < m; r++) {
6308     k   = rowBegin[r];
6309     mid = rowMid[r];
6310     s   = rowEnd[r];
6311     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6312     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6313     Atot += mid - k;
6314     Btot += s - mid;
6315 
6316     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6317     for (p = k; p < mid;) {
6318       col = j[p];
6319       q   = p;
6320       do {
6321         p++;
6322       } while (p < mid && j[p] == col);
6323       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6324       Annz++;
6325     }
6326 
6327     for (p = mid; p < s;) {
6328       col = j[p];
6329       q   = p;
6330       do {
6331         p++;
6332       } while (p < s && j[p] == col);
6333       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6334       Bnnz++;
6335     }
6336   }
6337   /* Output */
6338   *Aperm_ = Aperm;
6339   *Annz_  = Annz;
6340   *Atot_  = Atot;
6341   *Ajmap_ = Ajmap;
6342   *Bperm_ = Bperm;
6343   *Bnnz_  = Bnnz;
6344   *Btot_  = Btot;
6345   *Bjmap_ = Bjmap;
6346   PetscFunctionReturn(PETSC_SUCCESS);
6347 }
6348 
6349 /*
6350   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6351 
6352   Input Parameters:
6353     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6354     nnz:  number of unique nonzeros in the merged matrix
6355     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6356     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6357 
6358   Output Parameter: (memory is allocated by the caller)
6359     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6360 
6361   Example:
6362     nnz1 = 4
6363     nnz  = 6
6364     imap = [1,3,4,5]
6365     jmap = [0,3,5,6,7]
6366    then,
6367     jmap_new = [0,0,3,3,5,6,7]
6368 */
6369 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6370 {
6371   PetscCount k, p;
6372 
6373   PetscFunctionBegin;
6374   jmap_new[0] = 0;
6375   p           = nnz;                /* p loops over jmap_new[] backwards */
6376   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6377     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6378   }
6379   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6380   PetscFunctionReturn(PETSC_SUCCESS);
6381 }
6382 
6383 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6384 {
6385   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6386 
6387   PetscFunctionBegin;
6388   PetscCall(PetscSFDestroy(&coo->sf));
6389   PetscCall(PetscFree(coo->Aperm1));
6390   PetscCall(PetscFree(coo->Bperm1));
6391   PetscCall(PetscFree(coo->Ajmap1));
6392   PetscCall(PetscFree(coo->Bjmap1));
6393   PetscCall(PetscFree(coo->Aimap2));
6394   PetscCall(PetscFree(coo->Bimap2));
6395   PetscCall(PetscFree(coo->Aperm2));
6396   PetscCall(PetscFree(coo->Bperm2));
6397   PetscCall(PetscFree(coo->Ajmap2));
6398   PetscCall(PetscFree(coo->Bjmap2));
6399   PetscCall(PetscFree(coo->Cperm1));
6400   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6401   PetscCall(PetscFree(coo));
6402   PetscFunctionReturn(PETSC_SUCCESS);
6403 }
6404 
6405 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6406 {
6407   MPI_Comm             comm;
6408   PetscMPIInt          rank, size;
6409   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6410   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6411   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6412   PetscContainer       container;
6413   MatCOOStruct_MPIAIJ *coo;
6414 
6415   PetscFunctionBegin;
6416   PetscCall(PetscFree(mpiaij->garray));
6417   PetscCall(VecDestroy(&mpiaij->lvec));
6418 #if defined(PETSC_USE_CTABLE)
6419   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6420 #else
6421   PetscCall(PetscFree(mpiaij->colmap));
6422 #endif
6423   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6424   mat->assembled     = PETSC_FALSE;
6425   mat->was_assembled = PETSC_FALSE;
6426 
6427   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6428   PetscCallMPI(MPI_Comm_size(comm, &size));
6429   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6430   PetscCall(PetscLayoutSetUp(mat->rmap));
6431   PetscCall(PetscLayoutSetUp(mat->cmap));
6432   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6433   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6434   PetscCall(MatGetLocalSize(mat, &m, &n));
6435   PetscCall(MatGetSize(mat, &M, &N));
6436 
6437   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6438   /* entries come first, then local rows, then remote rows.                     */
6439   PetscCount n1 = coo_n, *perm1;
6440   PetscInt  *i1 = coo_i, *j1 = coo_j;
6441 
6442   PetscCall(PetscMalloc1(n1, &perm1));
6443   for (k = 0; k < n1; k++) perm1[k] = k;
6444 
6445   /* Manipulate indices so that entries with negative row or col indices will have smallest
6446      row indices, local entries will have greater but negative row indices, and remote entries
6447      will have positive row indices.
6448   */
6449   for (k = 0; k < n1; k++) {
6450     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6451     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6452     else {
6453       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6454       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6455     }
6456   }
6457 
6458   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6459   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6460 
6461   /* Advance k to the first entry we need to take care of */
6462   for (k = 0; k < n1; k++)
6463     if (i1[k] > PETSC_MIN_INT) break;
6464   PetscInt i1start = k;
6465 
6466   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6467   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6468 
6469   /*           Send remote rows to their owner                                  */
6470   /* Find which rows should be sent to which remote ranks*/
6471   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6472   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6473   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6474   const PetscInt *ranges;
6475   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6476 
6477   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6478   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6479   for (k = rem; k < n1;) {
6480     PetscMPIInt owner;
6481     PetscInt    firstRow, lastRow;
6482 
6483     /* Locate a row range */
6484     firstRow = i1[k]; /* first row of this owner */
6485     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6486     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6487 
6488     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6489     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6490 
6491     /* All entries in [k,p) belong to this remote owner */
6492     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6493       PetscMPIInt *sendto2;
6494       PetscInt    *nentries2;
6495       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6496 
6497       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6498       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6499       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6500       PetscCall(PetscFree2(sendto, nentries2));
6501       sendto   = sendto2;
6502       nentries = nentries2;
6503       maxNsend = maxNsend2;
6504     }
6505     sendto[nsend]   = owner;
6506     nentries[nsend] = p - k;
6507     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6508     nsend++;
6509     k = p;
6510   }
6511 
6512   /* Build 1st SF to know offsets on remote to send data */
6513   PetscSF      sf1;
6514   PetscInt     nroots = 1, nroots2 = 0;
6515   PetscInt     nleaves = nsend, nleaves2 = 0;
6516   PetscInt    *offsets;
6517   PetscSFNode *iremote;
6518 
6519   PetscCall(PetscSFCreate(comm, &sf1));
6520   PetscCall(PetscMalloc1(nsend, &iremote));
6521   PetscCall(PetscMalloc1(nsend, &offsets));
6522   for (k = 0; k < nsend; k++) {
6523     iremote[k].rank  = sendto[k];
6524     iremote[k].index = 0;
6525     nleaves2 += nentries[k];
6526     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6527   }
6528   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6529   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6530   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6531   PetscCall(PetscSFDestroy(&sf1));
6532   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6533 
6534   /* Build 2nd SF to send remote COOs to their owner */
6535   PetscSF sf2;
6536   nroots  = nroots2;
6537   nleaves = nleaves2;
6538   PetscCall(PetscSFCreate(comm, &sf2));
6539   PetscCall(PetscSFSetFromOptions(sf2));
6540   PetscCall(PetscMalloc1(nleaves, &iremote));
6541   p = 0;
6542   for (k = 0; k < nsend; k++) {
6543     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6544     for (q = 0; q < nentries[k]; q++, p++) {
6545       iremote[p].rank  = sendto[k];
6546       iremote[p].index = offsets[k] + q;
6547     }
6548   }
6549   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6550 
6551   /* Send the remote COOs to their owner */
6552   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6553   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6554   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6555   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6556   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6557   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6558   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6559 
6560   PetscCall(PetscFree(offsets));
6561   PetscCall(PetscFree2(sendto, nentries));
6562 
6563   /* Sort received COOs by row along with the permutation array     */
6564   for (k = 0; k < n2; k++) perm2[k] = k;
6565   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6566 
6567   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6568   PetscCount *Cperm1;
6569   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6570   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves));
6571 
6572   /* Support for HYPRE matrices, kind of a hack.
6573      Swap min column with diagonal so that diagonal values will go first */
6574   PetscBool   hypre;
6575   const char *name;
6576   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6577   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6578   if (hypre) {
6579     PetscInt *minj;
6580     PetscBT   hasdiag;
6581 
6582     PetscCall(PetscBTCreate(m, &hasdiag));
6583     PetscCall(PetscMalloc1(m, &minj));
6584     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6585     for (k = i1start; k < rem; k++) {
6586       if (j1[k] < cstart || j1[k] >= cend) continue;
6587       const PetscInt rindex = i1[k] - rstart;
6588       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6589       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6590     }
6591     for (k = 0; k < n2; k++) {
6592       if (j2[k] < cstart || j2[k] >= cend) continue;
6593       const PetscInt rindex = i2[k] - rstart;
6594       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6595       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6596     }
6597     for (k = i1start; k < rem; k++) {
6598       const PetscInt rindex = i1[k] - rstart;
6599       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6600       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6601       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6602     }
6603     for (k = 0; k < n2; k++) {
6604       const PetscInt rindex = i2[k] - rstart;
6605       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6606       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6607       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6608     }
6609     PetscCall(PetscBTDestroy(&hasdiag));
6610     PetscCall(PetscFree(minj));
6611   }
6612 
6613   /* Split local COOs and received COOs into diag/offdiag portions */
6614   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6615   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6616   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6617   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6618   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6619   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6620 
6621   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6622   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6623   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6624   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6625 
6626   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6627   PetscInt *Ai, *Bi;
6628   PetscInt *Aj, *Bj;
6629 
6630   PetscCall(PetscMalloc1(m + 1, &Ai));
6631   PetscCall(PetscMalloc1(m + 1, &Bi));
6632   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6633   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6634 
6635   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6636   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6637   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6638   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6639   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6640 
6641   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6642   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6643 
6644   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6645   /* expect nonzeros in A/B most likely have local contributing entries        */
6646   PetscInt    Annz = Ai[m];
6647   PetscInt    Bnnz = Bi[m];
6648   PetscCount *Ajmap1_new, *Bjmap1_new;
6649 
6650   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6651   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6652 
6653   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6654   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6655 
6656   PetscCall(PetscFree(Aimap1));
6657   PetscCall(PetscFree(Ajmap1));
6658   PetscCall(PetscFree(Bimap1));
6659   PetscCall(PetscFree(Bjmap1));
6660   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6661   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6662   PetscCall(PetscFree(perm1));
6663   PetscCall(PetscFree3(i2, j2, perm2));
6664 
6665   Ajmap1 = Ajmap1_new;
6666   Bjmap1 = Bjmap1_new;
6667 
6668   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6669   if (Annz < Annz1 + Annz2) {
6670     PetscInt *Aj_new;
6671     PetscCall(PetscMalloc1(Annz, &Aj_new));
6672     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6673     PetscCall(PetscFree(Aj));
6674     Aj = Aj_new;
6675   }
6676 
6677   if (Bnnz < Bnnz1 + Bnnz2) {
6678     PetscInt *Bj_new;
6679     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6680     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6681     PetscCall(PetscFree(Bj));
6682     Bj = Bj_new;
6683   }
6684 
6685   /* Create new submatrices for on-process and off-process coupling                  */
6686   PetscScalar     *Aa, *Ba;
6687   MatType          rtype;
6688   Mat_SeqAIJ      *a, *b;
6689   PetscObjectState state;
6690   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6691   PetscCall(PetscCalloc1(Bnnz, &Ba));
6692   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6693   if (cstart) {
6694     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6695   }
6696   PetscCall(MatDestroy(&mpiaij->A));
6697   PetscCall(MatDestroy(&mpiaij->B));
6698   PetscCall(MatGetRootType_Private(mat, &rtype));
6699   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6700   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6701   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6702   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6703   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6704   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6705 
6706   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6707   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6708   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6709   a->free_a = b->free_a = PETSC_TRUE;
6710   a->free_ij = b->free_ij = PETSC_TRUE;
6711 
6712   /* conversion must happen AFTER multiply setup */
6713   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6714   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6715   PetscCall(VecDestroy(&mpiaij->lvec));
6716   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6717 
6718   // Put the COO struct in a container and then attach that to the matrix
6719   PetscCall(PetscMalloc1(1, &coo));
6720   coo->n       = coo_n;
6721   coo->sf      = sf2;
6722   coo->sendlen = nleaves;
6723   coo->recvlen = nroots;
6724   coo->Annz    = Annz;
6725   coo->Bnnz    = Bnnz;
6726   coo->Annz2   = Annz2;
6727   coo->Bnnz2   = Bnnz2;
6728   coo->Atot1   = Atot1;
6729   coo->Atot2   = Atot2;
6730   coo->Btot1   = Btot1;
6731   coo->Btot2   = Btot2;
6732   coo->Ajmap1  = Ajmap1;
6733   coo->Aperm1  = Aperm1;
6734   coo->Bjmap1  = Bjmap1;
6735   coo->Bperm1  = Bperm1;
6736   coo->Aimap2  = Aimap2;
6737   coo->Ajmap2  = Ajmap2;
6738   coo->Aperm2  = Aperm2;
6739   coo->Bimap2  = Bimap2;
6740   coo->Bjmap2  = Bjmap2;
6741   coo->Bperm2  = Bperm2;
6742   coo->Cperm1  = Cperm1;
6743   // Allocate in preallocation. If not used, it has zero cost on host
6744   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6745   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6746   PetscCall(PetscContainerSetPointer(container, coo));
6747   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6748   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6749   PetscCall(PetscContainerDestroy(&container));
6750   PetscFunctionReturn(PETSC_SUCCESS);
6751 }
6752 
6753 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6754 {
6755   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6756   Mat                  A = mpiaij->A, B = mpiaij->B;
6757   PetscScalar         *Aa, *Ba;
6758   PetscScalar         *sendbuf, *recvbuf;
6759   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6760   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6761   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6762   const PetscCount    *Cperm1;
6763   PetscContainer       container;
6764   MatCOOStruct_MPIAIJ *coo;
6765 
6766   PetscFunctionBegin;
6767   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6768   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6769   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6770   sendbuf = coo->sendbuf;
6771   recvbuf = coo->recvbuf;
6772   Ajmap1  = coo->Ajmap1;
6773   Ajmap2  = coo->Ajmap2;
6774   Aimap2  = coo->Aimap2;
6775   Bjmap1  = coo->Bjmap1;
6776   Bjmap2  = coo->Bjmap2;
6777   Bimap2  = coo->Bimap2;
6778   Aperm1  = coo->Aperm1;
6779   Aperm2  = coo->Aperm2;
6780   Bperm1  = coo->Bperm1;
6781   Bperm2  = coo->Bperm2;
6782   Cperm1  = coo->Cperm1;
6783 
6784   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6785   PetscCall(MatSeqAIJGetArray(B, &Ba));
6786 
6787   /* Pack entries to be sent to remote */
6788   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6789 
6790   /* Send remote entries to their owner and overlap the communication with local computation */
6791   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6792   /* Add local entries to A and B */
6793   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6794     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6795     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6796     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6797   }
6798   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6799     PetscScalar sum = 0.0;
6800     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6801     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6802   }
6803   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6804 
6805   /* Add received remote entries to A and B */
6806   for (PetscCount i = 0; i < coo->Annz2; i++) {
6807     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6808   }
6809   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6810     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6811   }
6812   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6813   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6814   PetscFunctionReturn(PETSC_SUCCESS);
6815 }
6816 
6817 /*MC
6818    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6819 
6820    Options Database Keys:
6821 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6822 
6823    Level: beginner
6824 
6825    Notes:
6826    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6827     in this case the values associated with the rows and columns one passes in are set to zero
6828     in the matrix
6829 
6830     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6831     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6832 
6833 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6834 M*/
6835 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6836 {
6837   Mat_MPIAIJ *b;
6838   PetscMPIInt size;
6839 
6840   PetscFunctionBegin;
6841   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6842 
6843   PetscCall(PetscNew(&b));
6844   B->data       = (void *)b;
6845   B->ops[0]     = MatOps_Values;
6846   B->assembled  = PETSC_FALSE;
6847   B->insertmode = NOT_SET_VALUES;
6848   b->size       = size;
6849 
6850   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6851 
6852   /* build cache for off array entries formed */
6853   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6854 
6855   b->donotstash  = PETSC_FALSE;
6856   b->colmap      = NULL;
6857   b->garray      = NULL;
6858   b->roworiented = PETSC_TRUE;
6859 
6860   /* stuff used for matrix vector multiply */
6861   b->lvec  = NULL;
6862   b->Mvctx = NULL;
6863 
6864   /* stuff for MatGetRow() */
6865   b->rowindices   = NULL;
6866   b->rowvalues    = NULL;
6867   b->getrowactive = PETSC_FALSE;
6868 
6869   /* flexible pointer used in CUSPARSE classes */
6870   b->spptr = NULL;
6871 
6872   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6873   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6874   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6875   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6876   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6877   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6878   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6879   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6880   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6881   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6882 #if defined(PETSC_HAVE_CUDA)
6883   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6884 #endif
6885 #if defined(PETSC_HAVE_HIP)
6886   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6887 #endif
6888 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6889   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6890 #endif
6891 #if defined(PETSC_HAVE_MKL_SPARSE)
6892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6893 #endif
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6898 #if defined(PETSC_HAVE_ELEMENTAL)
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6900 #endif
6901 #if defined(PETSC_HAVE_SCALAPACK)
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6903 #endif
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6906 #if defined(PETSC_HAVE_HYPRE)
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6909 #endif
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6914   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6915   PetscFunctionReturn(PETSC_SUCCESS);
6916 }
6917 
6918 /*@C
6919   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6920   and "off-diagonal" part of the matrix in CSR format.
6921 
6922   Collective
6923 
6924   Input Parameters:
6925 + comm - MPI communicator
6926 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6927 . n    - This value should be the same as the local size used in creating the
6928        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6929        calculated if `N` is given) For square matrices `n` is almost always `m`.
6930 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6931 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6932 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6933 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6934 . a    - matrix values
6935 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6936 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6937 - oa   - matrix values
6938 
6939   Output Parameter:
6940 . mat - the matrix
6941 
6942   Level: advanced
6943 
6944   Notes:
6945   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6946   must free the arrays once the matrix has been destroyed and not before.
6947 
6948   The `i` and `j` indices are 0 based
6949 
6950   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6951 
6952   This sets local rows and cannot be used to set off-processor values.
6953 
6954   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6955   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6956   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6957   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6958   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6959   communication if it is known that only local entries will be set.
6960 
6961 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6962           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6963 @*/
6964 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6965 {
6966   Mat_MPIAIJ *maij;
6967 
6968   PetscFunctionBegin;
6969   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6970   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6971   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6972   PetscCall(MatCreate(comm, mat));
6973   PetscCall(MatSetSizes(*mat, m, n, M, N));
6974   PetscCall(MatSetType(*mat, MATMPIAIJ));
6975   maij = (Mat_MPIAIJ *)(*mat)->data;
6976 
6977   (*mat)->preallocated = PETSC_TRUE;
6978 
6979   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6980   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6981 
6982   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6983   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6984 
6985   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6986   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6987   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6988   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6989   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6990   PetscFunctionReturn(PETSC_SUCCESS);
6991 }
6992 
6993 typedef struct {
6994   Mat       *mp;    /* intermediate products */
6995   PetscBool *mptmp; /* is the intermediate product temporary ? */
6996   PetscInt   cp;    /* number of intermediate products */
6997 
6998   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6999   PetscInt    *startsj_s, *startsj_r;
7000   PetscScalar *bufa;
7001   Mat          P_oth;
7002 
7003   /* may take advantage of merging product->B */
7004   Mat Bloc; /* B-local by merging diag and off-diag */
7005 
7006   /* cusparse does not have support to split between symbolic and numeric phases.
7007      When api_user is true, we don't need to update the numerical values
7008      of the temporary storage */
7009   PetscBool reusesym;
7010 
7011   /* support for COO values insertion */
7012   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7013   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7014   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7015   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7016   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7017   PetscMemType mtype;
7018 
7019   /* customization */
7020   PetscBool abmerge;
7021   PetscBool P_oth_bind;
7022 } MatMatMPIAIJBACKEND;
7023 
7024 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7025 {
7026   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7027   PetscInt             i;
7028 
7029   PetscFunctionBegin;
7030   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7031   PetscCall(PetscFree(mmdata->bufa));
7032   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7033   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7034   PetscCall(MatDestroy(&mmdata->P_oth));
7035   PetscCall(MatDestroy(&mmdata->Bloc));
7036   PetscCall(PetscSFDestroy(&mmdata->sf));
7037   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7038   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7039   PetscCall(PetscFree(mmdata->own[0]));
7040   PetscCall(PetscFree(mmdata->own));
7041   PetscCall(PetscFree(mmdata->off[0]));
7042   PetscCall(PetscFree(mmdata->off));
7043   PetscCall(PetscFree(mmdata));
7044   PetscFunctionReturn(PETSC_SUCCESS);
7045 }
7046 
7047 /* Copy selected n entries with indices in idx[] of A to v[].
7048    If idx is NULL, copy the whole data array of A to v[]
7049  */
7050 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7051 {
7052   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7053 
7054   PetscFunctionBegin;
7055   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7056   if (f) {
7057     PetscCall((*f)(A, n, idx, v));
7058   } else {
7059     const PetscScalar *vv;
7060 
7061     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7062     if (n && idx) {
7063       PetscScalar    *w  = v;
7064       const PetscInt *oi = idx;
7065       PetscInt        j;
7066 
7067       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7068     } else {
7069       PetscCall(PetscArraycpy(v, vv, n));
7070     }
7071     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7072   }
7073   PetscFunctionReturn(PETSC_SUCCESS);
7074 }
7075 
7076 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7077 {
7078   MatMatMPIAIJBACKEND *mmdata;
7079   PetscInt             i, n_d, n_o;
7080 
7081   PetscFunctionBegin;
7082   MatCheckProduct(C, 1);
7083   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7084   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7085   if (!mmdata->reusesym) { /* update temporary matrices */
7086     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7087     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7088   }
7089   mmdata->reusesym = PETSC_FALSE;
7090 
7091   for (i = 0; i < mmdata->cp; i++) {
7092     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7093     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7094   }
7095   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7096     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7097 
7098     if (mmdata->mptmp[i]) continue;
7099     if (noff) {
7100       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7101 
7102       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7103       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7104       n_o += noff;
7105       n_d += nown;
7106     } else {
7107       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7108 
7109       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7110       n_d += mm->nz;
7111     }
7112   }
7113   if (mmdata->hasoffproc) { /* offprocess insertion */
7114     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7115     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7116   }
7117   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7118   PetscFunctionReturn(PETSC_SUCCESS);
7119 }
7120 
7121 /* Support for Pt * A, A * P, or Pt * A * P */
7122 #define MAX_NUMBER_INTERMEDIATE 4
7123 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7124 {
7125   Mat_Product           *product = C->product;
7126   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7127   Mat_MPIAIJ            *a, *p;
7128   MatMatMPIAIJBACKEND   *mmdata;
7129   ISLocalToGlobalMapping P_oth_l2g = NULL;
7130   IS                     glob      = NULL;
7131   const char            *prefix;
7132   char                   pprefix[256];
7133   const PetscInt        *globidx, *P_oth_idx;
7134   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7135   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7136   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7137                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7138                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7139   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7140 
7141   MatProductType ptype;
7142   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7143   PetscMPIInt    size;
7144 
7145   PetscFunctionBegin;
7146   MatCheckProduct(C, 1);
7147   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7148   ptype = product->type;
7149   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7150     ptype                                          = MATPRODUCT_AB;
7151     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7152   }
7153   switch (ptype) {
7154   case MATPRODUCT_AB:
7155     A          = product->A;
7156     P          = product->B;
7157     m          = A->rmap->n;
7158     n          = P->cmap->n;
7159     M          = A->rmap->N;
7160     N          = P->cmap->N;
7161     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7162     break;
7163   case MATPRODUCT_AtB:
7164     P          = product->A;
7165     A          = product->B;
7166     m          = P->cmap->n;
7167     n          = A->cmap->n;
7168     M          = P->cmap->N;
7169     N          = A->cmap->N;
7170     hasoffproc = PETSC_TRUE;
7171     break;
7172   case MATPRODUCT_PtAP:
7173     A          = product->A;
7174     P          = product->B;
7175     m          = P->cmap->n;
7176     n          = P->cmap->n;
7177     M          = P->cmap->N;
7178     N          = P->cmap->N;
7179     hasoffproc = PETSC_TRUE;
7180     break;
7181   default:
7182     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7183   }
7184   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7185   if (size == 1) hasoffproc = PETSC_FALSE;
7186 
7187   /* defaults */
7188   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7189     mp[i]    = NULL;
7190     mptmp[i] = PETSC_FALSE;
7191     rmapt[i] = -1;
7192     cmapt[i] = -1;
7193     rmapa[i] = NULL;
7194     cmapa[i] = NULL;
7195   }
7196 
7197   /* customization */
7198   PetscCall(PetscNew(&mmdata));
7199   mmdata->reusesym = product->api_user;
7200   if (ptype == MATPRODUCT_AB) {
7201     if (product->api_user) {
7202       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7203       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7204       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7205       PetscOptionsEnd();
7206     } else {
7207       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7208       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7209       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7210       PetscOptionsEnd();
7211     }
7212   } else if (ptype == MATPRODUCT_PtAP) {
7213     if (product->api_user) {
7214       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7215       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7216       PetscOptionsEnd();
7217     } else {
7218       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7219       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7220       PetscOptionsEnd();
7221     }
7222   }
7223   a = (Mat_MPIAIJ *)A->data;
7224   p = (Mat_MPIAIJ *)P->data;
7225   PetscCall(MatSetSizes(C, m, n, M, N));
7226   PetscCall(PetscLayoutSetUp(C->rmap));
7227   PetscCall(PetscLayoutSetUp(C->cmap));
7228   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7229   PetscCall(MatGetOptionsPrefix(C, &prefix));
7230 
7231   cp = 0;
7232   switch (ptype) {
7233   case MATPRODUCT_AB: /* A * P */
7234     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7235 
7236     /* A_diag * P_local (merged or not) */
7237     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7238       /* P is product->B */
7239       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7240       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7241       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7242       PetscCall(MatProductSetFill(mp[cp], product->fill));
7243       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7244       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7245       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7246       mp[cp]->product->api_user = product->api_user;
7247       PetscCall(MatProductSetFromOptions(mp[cp]));
7248       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7249       PetscCall(ISGetIndices(glob, &globidx));
7250       rmapt[cp] = 1;
7251       cmapt[cp] = 2;
7252       cmapa[cp] = globidx;
7253       mptmp[cp] = PETSC_FALSE;
7254       cp++;
7255     } else { /* A_diag * P_diag and A_diag * P_off */
7256       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7257       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7258       PetscCall(MatProductSetFill(mp[cp], product->fill));
7259       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7260       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7261       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7262       mp[cp]->product->api_user = product->api_user;
7263       PetscCall(MatProductSetFromOptions(mp[cp]));
7264       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7265       rmapt[cp] = 1;
7266       cmapt[cp] = 1;
7267       mptmp[cp] = PETSC_FALSE;
7268       cp++;
7269       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7270       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7271       PetscCall(MatProductSetFill(mp[cp], product->fill));
7272       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7273       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7274       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7275       mp[cp]->product->api_user = product->api_user;
7276       PetscCall(MatProductSetFromOptions(mp[cp]));
7277       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7278       rmapt[cp] = 1;
7279       cmapt[cp] = 2;
7280       cmapa[cp] = p->garray;
7281       mptmp[cp] = PETSC_FALSE;
7282       cp++;
7283     }
7284 
7285     /* A_off * P_other */
7286     if (mmdata->P_oth) {
7287       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7288       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7289       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7290       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7291       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7292       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7293       PetscCall(MatProductSetFill(mp[cp], product->fill));
7294       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7295       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7296       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7297       mp[cp]->product->api_user = product->api_user;
7298       PetscCall(MatProductSetFromOptions(mp[cp]));
7299       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7300       rmapt[cp] = 1;
7301       cmapt[cp] = 2;
7302       cmapa[cp] = P_oth_idx;
7303       mptmp[cp] = PETSC_FALSE;
7304       cp++;
7305     }
7306     break;
7307 
7308   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7309     /* A is product->B */
7310     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7311     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7312       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7313       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7314       PetscCall(MatProductSetFill(mp[cp], product->fill));
7315       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7316       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7317       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7318       mp[cp]->product->api_user = product->api_user;
7319       PetscCall(MatProductSetFromOptions(mp[cp]));
7320       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7321       PetscCall(ISGetIndices(glob, &globidx));
7322       rmapt[cp] = 2;
7323       rmapa[cp] = globidx;
7324       cmapt[cp] = 2;
7325       cmapa[cp] = globidx;
7326       mptmp[cp] = PETSC_FALSE;
7327       cp++;
7328     } else {
7329       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7330       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7331       PetscCall(MatProductSetFill(mp[cp], product->fill));
7332       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7333       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7334       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7335       mp[cp]->product->api_user = product->api_user;
7336       PetscCall(MatProductSetFromOptions(mp[cp]));
7337       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7338       PetscCall(ISGetIndices(glob, &globidx));
7339       rmapt[cp] = 1;
7340       cmapt[cp] = 2;
7341       cmapa[cp] = globidx;
7342       mptmp[cp] = PETSC_FALSE;
7343       cp++;
7344       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7345       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7346       PetscCall(MatProductSetFill(mp[cp], product->fill));
7347       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7348       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7349       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7350       mp[cp]->product->api_user = product->api_user;
7351       PetscCall(MatProductSetFromOptions(mp[cp]));
7352       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7353       rmapt[cp] = 2;
7354       rmapa[cp] = p->garray;
7355       cmapt[cp] = 2;
7356       cmapa[cp] = globidx;
7357       mptmp[cp] = PETSC_FALSE;
7358       cp++;
7359     }
7360     break;
7361   case MATPRODUCT_PtAP:
7362     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7363     /* P is product->B */
7364     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7365     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7366     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7367     PetscCall(MatProductSetFill(mp[cp], product->fill));
7368     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7369     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7370     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7371     mp[cp]->product->api_user = product->api_user;
7372     PetscCall(MatProductSetFromOptions(mp[cp]));
7373     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7374     PetscCall(ISGetIndices(glob, &globidx));
7375     rmapt[cp] = 2;
7376     rmapa[cp] = globidx;
7377     cmapt[cp] = 2;
7378     cmapa[cp] = globidx;
7379     mptmp[cp] = PETSC_FALSE;
7380     cp++;
7381     if (mmdata->P_oth) {
7382       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7383       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7384       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7385       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7386       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7387       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7388       PetscCall(MatProductSetFill(mp[cp], product->fill));
7389       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7390       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7391       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7392       mp[cp]->product->api_user = product->api_user;
7393       PetscCall(MatProductSetFromOptions(mp[cp]));
7394       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7395       mptmp[cp] = PETSC_TRUE;
7396       cp++;
7397       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7398       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7399       PetscCall(MatProductSetFill(mp[cp], product->fill));
7400       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7401       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7402       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7403       mp[cp]->product->api_user = product->api_user;
7404       PetscCall(MatProductSetFromOptions(mp[cp]));
7405       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7406       rmapt[cp] = 2;
7407       rmapa[cp] = globidx;
7408       cmapt[cp] = 2;
7409       cmapa[cp] = P_oth_idx;
7410       mptmp[cp] = PETSC_FALSE;
7411       cp++;
7412     }
7413     break;
7414   default:
7415     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7416   }
7417   /* sanity check */
7418   if (size > 1)
7419     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7420 
7421   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7422   for (i = 0; i < cp; i++) {
7423     mmdata->mp[i]    = mp[i];
7424     mmdata->mptmp[i] = mptmp[i];
7425   }
7426   mmdata->cp             = cp;
7427   C->product->data       = mmdata;
7428   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7429   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7430 
7431   /* memory type */
7432   mmdata->mtype = PETSC_MEMTYPE_HOST;
7433   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7434   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7435   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7436   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7437   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7438   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7439 
7440   /* prepare coo coordinates for values insertion */
7441 
7442   /* count total nonzeros of those intermediate seqaij Mats
7443     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7444     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7445     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7446   */
7447   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7448     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7449     if (mptmp[cp]) continue;
7450     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7451       const PetscInt *rmap = rmapa[cp];
7452       const PetscInt  mr   = mp[cp]->rmap->n;
7453       const PetscInt  rs   = C->rmap->rstart;
7454       const PetscInt  re   = C->rmap->rend;
7455       const PetscInt *ii   = mm->i;
7456       for (i = 0; i < mr; i++) {
7457         const PetscInt gr = rmap[i];
7458         const PetscInt nz = ii[i + 1] - ii[i];
7459         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7460         else ncoo_oown += nz;                  /* this row is local */
7461       }
7462     } else ncoo_d += mm->nz;
7463   }
7464 
7465   /*
7466     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7467 
7468     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7469 
7470     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7471 
7472     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7473     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7474     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7475 
7476     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7477     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7478   */
7479   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7480   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7481 
7482   /* gather (i,j) of nonzeros inserted by remote procs */
7483   if (hasoffproc) {
7484     PetscSF  msf;
7485     PetscInt ncoo2, *coo_i2, *coo_j2;
7486 
7487     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7488     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7489     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7490 
7491     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7492       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7493       PetscInt   *idxoff = mmdata->off[cp];
7494       PetscInt   *idxown = mmdata->own[cp];
7495       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7496         const PetscInt *rmap = rmapa[cp];
7497         const PetscInt *cmap = cmapa[cp];
7498         const PetscInt *ii   = mm->i;
7499         PetscInt       *coi  = coo_i + ncoo_o;
7500         PetscInt       *coj  = coo_j + ncoo_o;
7501         const PetscInt  mr   = mp[cp]->rmap->n;
7502         const PetscInt  rs   = C->rmap->rstart;
7503         const PetscInt  re   = C->rmap->rend;
7504         const PetscInt  cs   = C->cmap->rstart;
7505         for (i = 0; i < mr; i++) {
7506           const PetscInt *jj = mm->j + ii[i];
7507           const PetscInt  gr = rmap[i];
7508           const PetscInt  nz = ii[i + 1] - ii[i];
7509           if (gr < rs || gr >= re) { /* this is an offproc row */
7510             for (j = ii[i]; j < ii[i + 1]; j++) {
7511               *coi++    = gr;
7512               *idxoff++ = j;
7513             }
7514             if (!cmapt[cp]) { /* already global */
7515               for (j = 0; j < nz; j++) *coj++ = jj[j];
7516             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7517               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7518             } else { /* offdiag */
7519               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7520             }
7521             ncoo_o += nz;
7522           } else { /* this is a local row */
7523             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7524           }
7525         }
7526       }
7527       mmdata->off[cp + 1] = idxoff;
7528       mmdata->own[cp + 1] = idxown;
7529     }
7530 
7531     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7532     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7533     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7534     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7535     ncoo = ncoo_d + ncoo_oown + ncoo2;
7536     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7537     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7538     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7539     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7540     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7541     PetscCall(PetscFree2(coo_i, coo_j));
7542     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7543     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7544     coo_i = coo_i2;
7545     coo_j = coo_j2;
7546   } else { /* no offproc values insertion */
7547     ncoo = ncoo_d;
7548     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7549 
7550     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7551     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7552     PetscCall(PetscSFSetUp(mmdata->sf));
7553   }
7554   mmdata->hasoffproc = hasoffproc;
7555 
7556   /* gather (i,j) of nonzeros inserted locally */
7557   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7558     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7559     PetscInt       *coi  = coo_i + ncoo_d;
7560     PetscInt       *coj  = coo_j + ncoo_d;
7561     const PetscInt *jj   = mm->j;
7562     const PetscInt *ii   = mm->i;
7563     const PetscInt *cmap = cmapa[cp];
7564     const PetscInt *rmap = rmapa[cp];
7565     const PetscInt  mr   = mp[cp]->rmap->n;
7566     const PetscInt  rs   = C->rmap->rstart;
7567     const PetscInt  re   = C->rmap->rend;
7568     const PetscInt  cs   = C->cmap->rstart;
7569 
7570     if (mptmp[cp]) continue;
7571     if (rmapt[cp] == 1) { /* consecutive rows */
7572       /* fill coo_i */
7573       for (i = 0; i < mr; i++) {
7574         const PetscInt gr = i + rs;
7575         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7576       }
7577       /* fill coo_j */
7578       if (!cmapt[cp]) { /* type-0, already global */
7579         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7580       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7581         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7582       } else {                                            /* type-2, local to global for sparse columns */
7583         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7584       }
7585       ncoo_d += mm->nz;
7586     } else if (rmapt[cp] == 2) { /* sparse rows */
7587       for (i = 0; i < mr; i++) {
7588         const PetscInt *jj = mm->j + ii[i];
7589         const PetscInt  gr = rmap[i];
7590         const PetscInt  nz = ii[i + 1] - ii[i];
7591         if (gr >= rs && gr < re) { /* local rows */
7592           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7593           if (!cmapt[cp]) { /* type-0, already global */
7594             for (j = 0; j < nz; j++) *coj++ = jj[j];
7595           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7596             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7597           } else { /* type-2, local to global for sparse columns */
7598             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7599           }
7600           ncoo_d += nz;
7601         }
7602       }
7603     }
7604   }
7605   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7606   PetscCall(ISDestroy(&glob));
7607   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7608   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7609   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7610   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7611 
7612   /* preallocate with COO data */
7613   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7614   PetscCall(PetscFree2(coo_i, coo_j));
7615   PetscFunctionReturn(PETSC_SUCCESS);
7616 }
7617 
7618 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7619 {
7620   Mat_Product *product = mat->product;
7621 #if defined(PETSC_HAVE_DEVICE)
7622   PetscBool match  = PETSC_FALSE;
7623   PetscBool usecpu = PETSC_FALSE;
7624 #else
7625   PetscBool match = PETSC_TRUE;
7626 #endif
7627 
7628   PetscFunctionBegin;
7629   MatCheckProduct(mat, 1);
7630 #if defined(PETSC_HAVE_DEVICE)
7631   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7632   if (match) { /* we can always fallback to the CPU if requested */
7633     switch (product->type) {
7634     case MATPRODUCT_AB:
7635       if (product->api_user) {
7636         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7637         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7638         PetscOptionsEnd();
7639       } else {
7640         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7641         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7642         PetscOptionsEnd();
7643       }
7644       break;
7645     case MATPRODUCT_AtB:
7646       if (product->api_user) {
7647         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7648         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7649         PetscOptionsEnd();
7650       } else {
7651         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7652         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7653         PetscOptionsEnd();
7654       }
7655       break;
7656     case MATPRODUCT_PtAP:
7657       if (product->api_user) {
7658         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7659         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7660         PetscOptionsEnd();
7661       } else {
7662         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7663         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7664         PetscOptionsEnd();
7665       }
7666       break;
7667     default:
7668       break;
7669     }
7670     match = (PetscBool)!usecpu;
7671   }
7672 #endif
7673   if (match) {
7674     switch (product->type) {
7675     case MATPRODUCT_AB:
7676     case MATPRODUCT_AtB:
7677     case MATPRODUCT_PtAP:
7678       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7679       break;
7680     default:
7681       break;
7682     }
7683   }
7684   /* fallback to MPIAIJ ops */
7685   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7686   PetscFunctionReturn(PETSC_SUCCESS);
7687 }
7688 
7689 /*
7690    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7691 
7692    n - the number of block indices in cc[]
7693    cc - the block indices (must be large enough to contain the indices)
7694 */
7695 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7696 {
7697   PetscInt        cnt = -1, nidx, j;
7698   const PetscInt *idx;
7699 
7700   PetscFunctionBegin;
7701   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7702   if (nidx) {
7703     cnt     = 0;
7704     cc[cnt] = idx[0] / bs;
7705     for (j = 1; j < nidx; j++) {
7706       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7707     }
7708   }
7709   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7710   *n = cnt + 1;
7711   PetscFunctionReturn(PETSC_SUCCESS);
7712 }
7713 
7714 /*
7715     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7716 
7717     ncollapsed - the number of block indices
7718     collapsed - the block indices (must be large enough to contain the indices)
7719 */
7720 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7721 {
7722   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7723 
7724   PetscFunctionBegin;
7725   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7726   for (i = start + 1; i < start + bs; i++) {
7727     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7728     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7729     cprevtmp = cprev;
7730     cprev    = merged;
7731     merged   = cprevtmp;
7732   }
7733   *ncollapsed = nprev;
7734   if (collapsed) *collapsed = cprev;
7735   PetscFunctionReturn(PETSC_SUCCESS);
7736 }
7737 
7738 /*
7739    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7740 */
7741 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7742 {
7743   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7744   Mat                tGmat;
7745   MPI_Comm           comm;
7746   const PetscScalar *vals;
7747   const PetscInt    *idx;
7748   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7749   MatScalar         *AA; // this is checked in graph
7750   PetscBool          isseqaij;
7751   Mat                a, b, c;
7752   MatType            jtype;
7753 
7754   PetscFunctionBegin;
7755   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7756   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7757   PetscCall(MatGetType(Gmat, &jtype));
7758   PetscCall(MatCreate(comm, &tGmat));
7759   PetscCall(MatSetType(tGmat, jtype));
7760 
7761   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7762                Also, if the matrix is symmetric, can we skip this
7763                operation? It can be very expensive on large matrices. */
7764 
7765   // global sizes
7766   PetscCall(MatGetSize(Gmat, &MM, &NN));
7767   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7768   nloc = Iend - Istart;
7769   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7770   if (isseqaij) {
7771     a = Gmat;
7772     b = NULL;
7773   } else {
7774     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7775     a             = d->A;
7776     b             = d->B;
7777     garray        = d->garray;
7778   }
7779   /* Determine upper bound on non-zeros needed in new filtered matrix */
7780   for (PetscInt row = 0; row < nloc; row++) {
7781     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7782     d_nnz[row] = ncols;
7783     if (ncols > maxcols) maxcols = ncols;
7784     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7785   }
7786   if (b) {
7787     for (PetscInt row = 0; row < nloc; row++) {
7788       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7789       o_nnz[row] = ncols;
7790       if (ncols > maxcols) maxcols = ncols;
7791       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7792     }
7793   }
7794   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7795   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7796   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7797   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7798   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7799   PetscCall(PetscFree2(d_nnz, o_nnz));
7800   //
7801   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7802   nnz0 = nnz1 = 0;
7803   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7804     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7805       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7806       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7807         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7808         if (PetscRealPart(sv) > vfilter) {
7809           nnz1++;
7810           PetscInt cid = idx[jj] + Istart; //diag
7811           if (c != a) cid = garray[idx[jj]];
7812           AA[ncol_row] = vals[jj];
7813           AJ[ncol_row] = cid;
7814           ncol_row++;
7815         }
7816       }
7817       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7818       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7819     }
7820   }
7821   PetscCall(PetscFree2(AA, AJ));
7822   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7823   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7824   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7825 
7826   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7827 
7828   *filteredG = tGmat;
7829   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7830   PetscFunctionReturn(PETSC_SUCCESS);
7831 }
7832 
7833 /*
7834  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7835 
7836  Input Parameter:
7837  . Amat - matrix
7838  - symmetrize - make the result symmetric
7839  + scale - scale with diagonal
7840 
7841  Output Parameter:
7842  . a_Gmat - output scalar graph >= 0
7843 
7844 */
7845 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7846 {
7847   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7848   MPI_Comm  comm;
7849   Mat       Gmat;
7850   PetscBool ismpiaij, isseqaij;
7851   Mat       a, b, c;
7852   MatType   jtype;
7853 
7854   PetscFunctionBegin;
7855   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7856   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7857   PetscCall(MatGetSize(Amat, &MM, &NN));
7858   PetscCall(MatGetBlockSize(Amat, &bs));
7859   nloc = (Iend - Istart) / bs;
7860 
7861   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7862   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7863   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7864 
7865   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7866   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7867      implementation */
7868   if (bs > 1) {
7869     PetscCall(MatGetType(Amat, &jtype));
7870     PetscCall(MatCreate(comm, &Gmat));
7871     PetscCall(MatSetType(Gmat, jtype));
7872     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7873     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7874     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7875       PetscInt  *d_nnz, *o_nnz;
7876       MatScalar *aa, val, *AA;
7877       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7878       if (isseqaij) {
7879         a = Amat;
7880         b = NULL;
7881       } else {
7882         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7883         a             = d->A;
7884         b             = d->B;
7885       }
7886       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7887       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7888       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7889         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7890         const PetscInt *cols1, *cols2;
7891         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7892           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7893           nnz[brow / bs] = nc2 / bs;
7894           if (nc2 % bs) ok = 0;
7895           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7896           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7897             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7898             if (nc1 != nc2) ok = 0;
7899             else {
7900               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7901                 if (cols1[jj] != cols2[jj]) ok = 0;
7902                 if (cols1[jj] % bs != jj % bs) ok = 0;
7903               }
7904             }
7905             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7906           }
7907           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7908           if (!ok) {
7909             PetscCall(PetscFree2(d_nnz, o_nnz));
7910             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7911             goto old_bs;
7912           }
7913         }
7914       }
7915       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7916       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7917       PetscCall(PetscFree2(d_nnz, o_nnz));
7918       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7919       // diag
7920       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7921         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7922         ai               = aseq->i;
7923         n                = ai[brow + 1] - ai[brow];
7924         aj               = aseq->j + ai[brow];
7925         for (int k = 0; k < n; k += bs) {        // block columns
7926           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7927           val        = 0;
7928           for (int ii = 0; ii < bs; ii++) { // rows in block
7929             aa = aseq->a + ai[brow + ii] + k;
7930             for (int jj = 0; jj < bs; jj++) {         // columns in block
7931               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7932             }
7933           }
7934           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7935           AA[k / bs] = val;
7936         }
7937         grow = Istart / bs + brow / bs;
7938         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7939       }
7940       // off-diag
7941       if (ismpiaij) {
7942         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7943         const PetscScalar *vals;
7944         const PetscInt    *cols, *garray = aij->garray;
7945         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7946         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7947           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7948           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7949             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7950             AA[k / bs] = 0;
7951             AJ[cidx]   = garray[cols[k]] / bs;
7952           }
7953           nc = ncols / bs;
7954           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7955           for (int ii = 0; ii < bs; ii++) { // rows in block
7956             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7957             for (int k = 0; k < ncols; k += bs) {
7958               for (int jj = 0; jj < bs; jj++) { // cols in block
7959                 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7960                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7961               }
7962             }
7963             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7964           }
7965           grow = Istart / bs + brow / bs;
7966           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7967         }
7968       }
7969       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7970       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7971       PetscCall(PetscFree2(AA, AJ));
7972     } else {
7973       const PetscScalar *vals;
7974       const PetscInt    *idx;
7975       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7976     old_bs:
7977       /*
7978        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7979        */
7980       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7981       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7982       if (isseqaij) {
7983         PetscInt max_d_nnz;
7984         /*
7985          Determine exact preallocation count for (sequential) scalar matrix
7986          */
7987         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7988         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7989         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7990         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7991         PetscCall(PetscFree3(w0, w1, w2));
7992       } else if (ismpiaij) {
7993         Mat             Daij, Oaij;
7994         const PetscInt *garray;
7995         PetscInt        max_d_nnz;
7996         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7997         /*
7998          Determine exact preallocation count for diagonal block portion of scalar matrix
7999          */
8000         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
8001         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
8002         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
8003         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
8004         PetscCall(PetscFree3(w0, w1, w2));
8005         /*
8006          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
8007          */
8008         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
8009           o_nnz[jj] = 0;
8010           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
8011             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
8012             o_nnz[jj] += ncols;
8013             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
8014           }
8015           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
8016         }
8017       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
8018       /* get scalar copy (norms) of matrix */
8019       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
8020       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
8021       PetscCall(PetscFree2(d_nnz, o_nnz));
8022       for (Ii = Istart; Ii < Iend; Ii++) {
8023         PetscInt dest_row = Ii / bs;
8024         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
8025         for (jj = 0; jj < ncols; jj++) {
8026           PetscInt    dest_col = idx[jj] / bs;
8027           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
8028           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
8029         }
8030         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8031       }
8032       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8033       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8034     }
8035   } else {
8036     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8037     else {
8038       Gmat = Amat;
8039       PetscCall(PetscObjectReference((PetscObject)Gmat));
8040     }
8041     if (isseqaij) {
8042       a = Gmat;
8043       b = NULL;
8044     } else {
8045       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8046       a             = d->A;
8047       b             = d->B;
8048     }
8049     if (filter >= 0 || scale) {
8050       /* take absolute value of each entry */
8051       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8052         MatInfo      info;
8053         PetscScalar *avals;
8054         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8055         PetscCall(MatSeqAIJGetArray(c, &avals));
8056         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8057         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8058       }
8059     }
8060   }
8061   if (symmetrize) {
8062     PetscBool isset, issym;
8063     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8064     if (!isset || !issym) {
8065       Mat matTrans;
8066       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8067       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8068       PetscCall(MatDestroy(&matTrans));
8069     }
8070     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8071   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8072   if (scale) {
8073     /* scale c for all diagonal values = 1 or -1 */
8074     Vec diag;
8075     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8076     PetscCall(MatGetDiagonal(Gmat, diag));
8077     PetscCall(VecReciprocal(diag));
8078     PetscCall(VecSqrtAbs(diag));
8079     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8080     PetscCall(VecDestroy(&diag));
8081   }
8082   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8083 
8084   if (filter >= 0) {
8085     Mat Fmat = NULL; /* some silly compiler needs this */
8086 
8087     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
8088     PetscCall(MatDestroy(&Gmat));
8089     Gmat = Fmat;
8090   }
8091   *a_Gmat = Gmat;
8092   PetscFunctionReturn(PETSC_SUCCESS);
8093 }
8094 
8095 /*
8096     Special version for direct calls from Fortran
8097 */
8098 #include <petsc/private/fortranimpl.h>
8099 
8100 /* Change these macros so can be used in void function */
8101 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8102 #undef PetscCall
8103 #define PetscCall(...) \
8104   do { \
8105     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8106     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8107       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8108       return; \
8109     } \
8110   } while (0)
8111 
8112 #undef SETERRQ
8113 #define SETERRQ(comm, ierr, ...) \
8114   do { \
8115     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8116     return; \
8117   } while (0)
8118 
8119 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8120   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8121 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8122   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8123 #else
8124 #endif
8125 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8126 {
8127   Mat         mat = *mmat;
8128   PetscInt    m = *mm, n = *mn;
8129   InsertMode  addv = *maddv;
8130   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8131   PetscScalar value;
8132 
8133   MatCheckPreallocated(mat, 1);
8134   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8135   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8136   {
8137     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8138     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8139     PetscBool roworiented = aij->roworiented;
8140 
8141     /* Some Variables required in the macro */
8142     Mat         A     = aij->A;
8143     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8144     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8145     MatScalar  *aa;
8146     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8147     Mat         B                 = aij->B;
8148     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8149     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8150     MatScalar  *ba;
8151     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8152      * cannot use "#if defined" inside a macro. */
8153     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8154 
8155     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8156     PetscInt   nonew = a->nonew;
8157     MatScalar *ap1, *ap2;
8158 
8159     PetscFunctionBegin;
8160     PetscCall(MatSeqAIJGetArray(A, &aa));
8161     PetscCall(MatSeqAIJGetArray(B, &ba));
8162     for (i = 0; i < m; i++) {
8163       if (im[i] < 0) continue;
8164       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8165       if (im[i] >= rstart && im[i] < rend) {
8166         row      = im[i] - rstart;
8167         lastcol1 = -1;
8168         rp1      = aj + ai[row];
8169         ap1      = aa + ai[row];
8170         rmax1    = aimax[row];
8171         nrow1    = ailen[row];
8172         low1     = 0;
8173         high1    = nrow1;
8174         lastcol2 = -1;
8175         rp2      = bj + bi[row];
8176         ap2      = ba + bi[row];
8177         rmax2    = bimax[row];
8178         nrow2    = bilen[row];
8179         low2     = 0;
8180         high2    = nrow2;
8181 
8182         for (j = 0; j < n; j++) {
8183           if (roworiented) value = v[i * n + j];
8184           else value = v[i + j * m];
8185           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8186           if (in[j] >= cstart && in[j] < cend) {
8187             col = in[j] - cstart;
8188             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8189           } else if (in[j] < 0) continue;
8190           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8191             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8192           } else {
8193             if (mat->was_assembled) {
8194               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8195 #if defined(PETSC_USE_CTABLE)
8196               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8197               col--;
8198 #else
8199               col = aij->colmap[in[j]] - 1;
8200 #endif
8201               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8202                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8203                 col = in[j];
8204                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8205                 B        = aij->B;
8206                 b        = (Mat_SeqAIJ *)B->data;
8207                 bimax    = b->imax;
8208                 bi       = b->i;
8209                 bilen    = b->ilen;
8210                 bj       = b->j;
8211                 rp2      = bj + bi[row];
8212                 ap2      = ba + bi[row];
8213                 rmax2    = bimax[row];
8214                 nrow2    = bilen[row];
8215                 low2     = 0;
8216                 high2    = nrow2;
8217                 bm       = aij->B->rmap->n;
8218                 ba       = b->a;
8219                 inserted = PETSC_FALSE;
8220               }
8221             } else col = in[j];
8222             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8223           }
8224         }
8225       } else if (!aij->donotstash) {
8226         if (roworiented) {
8227           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8228         } else {
8229           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8230         }
8231       }
8232     }
8233     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8234     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8235   }
8236   PetscFunctionReturnVoid();
8237 }
8238 
8239 /* Undefining these here since they were redefined from their original definition above! No
8240  * other PETSc functions should be defined past this point, as it is impossible to recover the
8241  * original definitions */
8242 #undef PetscCall
8243 #undef SETERRQ
8244