xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ea9ee2c1d69c9e3cf6d2b3c8a205b9880d3dba39)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14 #if defined(PETSC_USE_LOG)
15   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
16 #endif
17   PetscCall(MatStashDestroy_Private(&mat->stash));
18   PetscCall(VecDestroy(&aij->diag));
19   PetscCall(MatDestroy(&aij->A));
20   PetscCall(MatDestroy(&aij->B));
21 #if defined(PETSC_USE_CTABLE)
22   PetscCall(PetscHMapIDestroy(&aij->colmap));
23 #else
24   PetscCall(PetscFree(aij->colmap));
25 #endif
26   PetscCall(PetscFree(aij->garray));
27   PetscCall(VecDestroy(&aij->lvec));
28   PetscCall(VecScatterDestroy(&aij->Mvctx));
29   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
30   PetscCall(PetscFree(aij->ld));
31 
32   PetscCall(PetscFree(mat->data));
33 
34   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
35   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
36 
37   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
47 #if defined(PETSC_HAVE_CUDA)
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
49 #endif
50 #if defined(PETSC_HAVE_HIP)
51   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
52 #endif
53 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
55 #endif
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
57 #if defined(PETSC_HAVE_ELEMENTAL)
58   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
59 #endif
60 #if defined(PETSC_HAVE_SCALAPACK)
61   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
62 #endif
63 #if defined(PETSC_HAVE_HYPRE)
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
66 #endif
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
73 #if defined(PETSC_HAVE_MKL_SPARSE)
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
75 #endif
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
79   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
80   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
85 #define TYPE AIJ
86 #define TYPE_AIJ
87 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
88 #undef TYPE
89 #undef TYPE_AIJ
90 
91 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
92 {
93   Mat B;
94 
95   PetscFunctionBegin;
96   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
97   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
98   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
99   PetscCall(MatDestroy(&B));
100   PetscFunctionReturn(PETSC_SUCCESS);
101 }
102 
103 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
104 {
105   Mat B;
106 
107   PetscFunctionBegin;
108   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
109   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
110   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
111   PetscFunctionReturn(PETSC_SUCCESS);
112 }
113 
114 /*MC
115    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
116 
117    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
118    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
119   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
120   for communicators controlling multiple processes.  It is recommended that you call both of
121   the above preallocation routines for simplicity.
122 
123    Options Database Key:
124 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
125 
126   Developer Note:
127   Level: beginner
128 
129     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
130    enough exist.
131 
132 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
133 M*/
134 
135 /*MC
136    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
137 
138    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
139    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
140    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
141   for communicators controlling multiple processes.  It is recommended that you call both of
142   the above preallocation routines for simplicity.
143 
144    Options Database Key:
145 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
146 
147   Level: beginner
148 
149 .seealso: [](chapter_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
150 M*/
151 
152 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
153 {
154   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
155 
156   PetscFunctionBegin;
157 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
158   A->boundtocpu = flg;
159 #endif
160   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
161   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
162 
163   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
164    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
165    * to differ from the parent matrix. */
166   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
167   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
168 
169   PetscFunctionReturn(PETSC_SUCCESS);
170 }
171 
172 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
173 {
174   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
175 
176   PetscFunctionBegin;
177   if (mat->A) {
178     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
179     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
185 {
186   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
187   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
188   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
189   const PetscInt  *ia, *ib;
190   const MatScalar *aa, *bb, *aav, *bav;
191   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
192   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
193 
194   PetscFunctionBegin;
195   *keptrows = NULL;
196 
197   ia = a->i;
198   ib = b->i;
199   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
200   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
201   for (i = 0; i < m; i++) {
202     na = ia[i + 1] - ia[i];
203     nb = ib[i + 1] - ib[i];
204     if (!na && !nb) {
205       cnt++;
206       goto ok1;
207     }
208     aa = aav + ia[i];
209     for (j = 0; j < na; j++) {
210       if (aa[j] != 0.0) goto ok1;
211     }
212     bb = bav + ib[i];
213     for (j = 0; j < nb; j++) {
214       if (bb[j] != 0.0) goto ok1;
215     }
216     cnt++;
217   ok1:;
218   }
219   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
220   if (!n0rows) {
221     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
222     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
223     PetscFunctionReturn(PETSC_SUCCESS);
224   }
225   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
226   cnt = 0;
227   for (i = 0; i < m; i++) {
228     na = ia[i + 1] - ia[i];
229     nb = ib[i + 1] - ib[i];
230     if (!na && !nb) continue;
231     aa = aav + ia[i];
232     for (j = 0; j < na; j++) {
233       if (aa[j] != 0.0) {
234         rows[cnt++] = rstart + i;
235         goto ok2;
236       }
237     }
238     bb = bav + ib[i];
239     for (j = 0; j < nb; j++) {
240       if (bb[j] != 0.0) {
241         rows[cnt++] = rstart + i;
242         goto ok2;
243       }
244     }
245   ok2:;
246   }
247   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
248   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
249   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
250   PetscFunctionReturn(PETSC_SUCCESS);
251 }
252 
253 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
254 {
255   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
256   PetscBool   cong;
257 
258   PetscFunctionBegin;
259   PetscCall(MatHasCongruentLayouts(Y, &cong));
260   if (Y->assembled && cong) {
261     PetscCall(MatDiagonalSet(aij->A, D, is));
262   } else {
263     PetscCall(MatDiagonalSet_Default(Y, D, is));
264   }
265   PetscFunctionReturn(PETSC_SUCCESS);
266 }
267 
268 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
269 {
270   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
271   PetscInt    i, rstart, nrows, *rows;
272 
273   PetscFunctionBegin;
274   *zrows = NULL;
275   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
276   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
277   for (i = 0; i < nrows; i++) rows[i] += rstart;
278   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
279   PetscFunctionReturn(PETSC_SUCCESS);
280 }
281 
282 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
283 {
284   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
285   PetscInt           i, m, n, *garray = aij->garray;
286   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
287   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
288   PetscReal         *work;
289   const PetscScalar *dummy;
290 
291   PetscFunctionBegin;
292   PetscCall(MatGetSize(A, &m, &n));
293   PetscCall(PetscCalloc1(n, &work));
294   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
296   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
297   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
298   if (type == NORM_2) {
299     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
300     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
301   } else if (type == NORM_1) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
304   } else if (type == NORM_INFINITY) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
307   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
310   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
311     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
312     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
313   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
314   if (type == NORM_INFINITY) {
315     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
316   } else {
317     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
318   }
319   PetscCall(PetscFree(work));
320   if (type == NORM_2) {
321     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
322   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
323     for (i = 0; i < n; i++) reductions[i] /= m;
324   }
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
329 {
330   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
331   IS              sis, gis;
332   const PetscInt *isis, *igis;
333   PetscInt        n, *iis, nsis, ngis, rstart, i;
334 
335   PetscFunctionBegin;
336   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
337   PetscCall(MatFindNonzeroRows(a->B, &gis));
338   PetscCall(ISGetSize(gis, &ngis));
339   PetscCall(ISGetSize(sis, &nsis));
340   PetscCall(ISGetIndices(sis, &isis));
341   PetscCall(ISGetIndices(gis, &igis));
342 
343   PetscCall(PetscMalloc1(ngis + nsis, &iis));
344   PetscCall(PetscArraycpy(iis, igis, ngis));
345   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
346   n = ngis + nsis;
347   PetscCall(PetscSortRemoveDupsInt(&n, iis));
348   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
349   for (i = 0; i < n; i++) iis[i] += rstart;
350   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
351 
352   PetscCall(ISRestoreIndices(sis, &isis));
353   PetscCall(ISRestoreIndices(gis, &igis));
354   PetscCall(ISDestroy(&sis));
355   PetscCall(ISDestroy(&gis));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 /*
360   Local utility routine that creates a mapping from the global column
361 number to the local number in the off-diagonal part of the local
362 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
363 a slightly higher hash table cost; without it it is not scalable (each processor
364 has an order N integer array but is fast to access.
365 */
366 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
367 {
368   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
369   PetscInt    n   = aij->B->cmap->n, i;
370 
371   PetscFunctionBegin;
372   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
373 #if defined(PETSC_USE_CTABLE)
374   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
375   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
376 #else
377   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
378   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
379 #endif
380   PetscFunctionReturn(PETSC_SUCCESS);
381 }
382 
383 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
384   { \
385     if (col <= lastcol1) low1 = 0; \
386     else high1 = nrow1; \
387     lastcol1 = col; \
388     while (high1 - low1 > 5) { \
389       t = (low1 + high1) / 2; \
390       if (rp1[t] > col) high1 = t; \
391       else low1 = t; \
392     } \
393     for (_i = low1; _i < high1; _i++) { \
394       if (rp1[_i] > col) break; \
395       if (rp1[_i] == col) { \
396         if (addv == ADD_VALUES) { \
397           ap1[_i] += value; \
398           /* Not sure LogFlops will slow dow the code or not */ \
399           (void)PetscLogFlops(1.0); \
400         } else ap1[_i] = value; \
401         goto a_noinsert; \
402       } \
403     } \
404     if (value == 0.0 && ignorezeroentries && row != col) { \
405       low1  = 0; \
406       high1 = nrow1; \
407       goto a_noinsert; \
408     } \
409     if (nonew == 1) { \
410       low1  = 0; \
411       high1 = nrow1; \
412       goto a_noinsert; \
413     } \
414     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
415     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
416     N = nrow1++ - 1; \
417     a->nz++; \
418     high1++; \
419     /* shift up all the later entries in this row */ \
420     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
421     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
422     rp1[_i] = col; \
423     ap1[_i] = value; \
424     A->nonzerostate++; \
425   a_noinsert:; \
426     ailen[row] = nrow1; \
427   }
428 
429 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
430   { \
431     if (col <= lastcol2) low2 = 0; \
432     else high2 = nrow2; \
433     lastcol2 = col; \
434     while (high2 - low2 > 5) { \
435       t = (low2 + high2) / 2; \
436       if (rp2[t] > col) high2 = t; \
437       else low2 = t; \
438     } \
439     for (_i = low2; _i < high2; _i++) { \
440       if (rp2[_i] > col) break; \
441       if (rp2[_i] == col) { \
442         if (addv == ADD_VALUES) { \
443           ap2[_i] += value; \
444           (void)PetscLogFlops(1.0); \
445         } else ap2[_i] = value; \
446         goto b_noinsert; \
447       } \
448     } \
449     if (value == 0.0 && ignorezeroentries) { \
450       low2  = 0; \
451       high2 = nrow2; \
452       goto b_noinsert; \
453     } \
454     if (nonew == 1) { \
455       low2  = 0; \
456       high2 = nrow2; \
457       goto b_noinsert; \
458     } \
459     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
460     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
461     N = nrow2++ - 1; \
462     b->nz++; \
463     high2++; \
464     /* shift up all the later entries in this row */ \
465     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
466     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
467     rp2[_i] = col; \
468     ap2[_i] = value; \
469     B->nonzerostate++; \
470   b_noinsert:; \
471     bilen[row] = nrow2; \
472   }
473 
474 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
475 {
476   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
477   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
478   PetscInt     l, *garray                         = mat->garray, diag;
479   PetscScalar *aa, *ba;
480 
481   PetscFunctionBegin;
482   /* code only works for square matrices A */
483 
484   /* find size of row to the left of the diagonal part */
485   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
486   row = row - diag;
487   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
488     if (garray[b->j[b->i[row] + l]] > diag) break;
489   }
490   if (l) {
491     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
492     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
493     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
494   }
495 
496   /* diagonal part */
497   if (a->i[row + 1] - a->i[row]) {
498     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
499     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
500     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
501   }
502 
503   /* right of diagonal part */
504   if (b->i[row + 1] - b->i[row] - l) {
505     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
506     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
507     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
508   }
509   PetscFunctionReturn(PETSC_SUCCESS);
510 }
511 
512 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
513 {
514   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
515   PetscScalar value = 0.0;
516   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
517   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
518   PetscBool   roworiented = aij->roworiented;
519 
520   /* Some Variables required in the macro */
521   Mat         A     = aij->A;
522   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
523   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
524   PetscBool   ignorezeroentries = a->ignorezeroentries;
525   Mat         B                 = aij->B;
526   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
527   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
528   MatScalar  *aa, *ba;
529   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
530   PetscInt    nonew;
531   MatScalar  *ap1, *ap2;
532 
533   PetscFunctionBegin;
534   PetscCall(MatSeqAIJGetArray(A, &aa));
535   PetscCall(MatSeqAIJGetArray(B, &ba));
536   for (i = 0; i < m; i++) {
537     if (im[i] < 0) continue;
538     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
539     if (im[i] >= rstart && im[i] < rend) {
540       row      = im[i] - rstart;
541       lastcol1 = -1;
542       rp1      = aj + ai[row];
543       ap1      = aa + ai[row];
544       rmax1    = aimax[row];
545       nrow1    = ailen[row];
546       low1     = 0;
547       high1    = nrow1;
548       lastcol2 = -1;
549       rp2      = bj + bi[row];
550       ap2      = ba + bi[row];
551       rmax2    = bimax[row];
552       nrow2    = bilen[row];
553       low2     = 0;
554       high2    = nrow2;
555 
556       for (j = 0; j < n; j++) {
557         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
558         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
559         if (in[j] >= cstart && in[j] < cend) {
560           col   = in[j] - cstart;
561           nonew = a->nonew;
562           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
563         } else if (in[j] < 0) {
564           continue;
565         } else {
566           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
567           if (mat->was_assembled) {
568             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
569 #if defined(PETSC_USE_CTABLE)
570             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
571             col--;
572 #else
573             col = aij->colmap[in[j]] - 1;
574 #endif
575             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
576               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
577               col = in[j];
578               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
579               B     = aij->B;
580               b     = (Mat_SeqAIJ *)B->data;
581               bimax = b->imax;
582               bi    = b->i;
583               bilen = b->ilen;
584               bj    = b->j;
585               ba    = b->a;
586               rp2   = bj + bi[row];
587               ap2   = ba + bi[row];
588               rmax2 = bimax[row];
589               nrow2 = bilen[row];
590               low2  = 0;
591               high2 = nrow2;
592               bm    = aij->B->rmap->n;
593               ba    = b->a;
594             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
595               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
596                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
597               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
598             }
599           } else col = in[j];
600           nonew = b->nonew;
601           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
602         }
603       }
604     } else {
605       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
606       if (!aij->donotstash) {
607         mat->assembled = PETSC_FALSE;
608         if (roworiented) {
609           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
610         } else {
611           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
612         }
613       }
614     }
615   }
616   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
617   PetscCall(MatSeqAIJRestoreArray(B, &ba));
618   PetscFunctionReturn(PETSC_SUCCESS);
619 }
620 
621 /*
622     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
623     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
624     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
625 */
626 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
627 {
628   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
629   Mat         A      = aij->A; /* diagonal part of the matrix */
630   Mat         B      = aij->B; /* offdiagonal part of the matrix */
631   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
632   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
633   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
634   PetscInt   *ailen = a->ilen, *aj = a->j;
635   PetscInt   *bilen = b->ilen, *bj = b->j;
636   PetscInt    am          = aij->A->rmap->n, j;
637   PetscInt    diag_so_far = 0, dnz;
638   PetscInt    offd_so_far = 0, onz;
639 
640   PetscFunctionBegin;
641   /* Iterate over all rows of the matrix */
642   for (j = 0; j < am; j++) {
643     dnz = onz = 0;
644     /*  Iterate over all non-zero columns of the current row */
645     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
646       /* If column is in the diagonal */
647       if (mat_j[col] >= cstart && mat_j[col] < cend) {
648         aj[diag_so_far++] = mat_j[col] - cstart;
649         dnz++;
650       } else { /* off-diagonal entries */
651         bj[offd_so_far++] = mat_j[col];
652         onz++;
653       }
654     }
655     ailen[j] = dnz;
656     bilen[j] = onz;
657   }
658   PetscFunctionReturn(PETSC_SUCCESS);
659 }
660 
661 /*
662     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
663     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
664     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
665     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
666     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
667 */
668 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
669 {
670   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
671   Mat          A    = aij->A; /* diagonal part of the matrix */
672   Mat          B    = aij->B; /* offdiagonal part of the matrix */
673   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
674   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
675   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
676   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
677   PetscInt    *ailen = a->ilen, *aj = a->j;
678   PetscInt    *bilen = b->ilen, *bj = b->j;
679   PetscInt     am          = aij->A->rmap->n, j;
680   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
681   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
682   PetscScalar *aa = a->a, *ba = b->a;
683 
684   PetscFunctionBegin;
685   /* Iterate over all rows of the matrix */
686   for (j = 0; j < am; j++) {
687     dnz_row = onz_row = 0;
688     rowstart_offd     = full_offd_i[j];
689     rowstart_diag     = full_diag_i[j];
690     /*  Iterate over all non-zero columns of the current row */
691     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
695         aa[rowstart_diag + dnz_row] = mat_a[col];
696         dnz_row++;
697       } else { /* off-diagonal entries */
698         bj[rowstart_offd + onz_row] = mat_j[col];
699         ba[rowstart_offd + onz_row] = mat_a[col];
700         onz_row++;
701       }
702     }
703     ailen[j] = dnz_row;
704     bilen[j] = onz_row;
705   }
706   PetscFunctionReturn(PETSC_SUCCESS);
707 }
708 
709 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
710 {
711   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
712   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
713   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
714 
715   PetscFunctionBegin;
716   for (i = 0; i < m; i++) {
717     if (idxm[i] < 0) continue; /* negative row */
718     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
719     if (idxm[i] >= rstart && idxm[i] < rend) {
720       row = idxm[i] - rstart;
721       for (j = 0; j < n; j++) {
722         if (idxn[j] < 0) continue; /* negative column */
723         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
724         if (idxn[j] >= cstart && idxn[j] < cend) {
725           col = idxn[j] - cstart;
726           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
727         } else {
728           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
729 #if defined(PETSC_USE_CTABLE)
730           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
731           col--;
732 #else
733           col = aij->colmap[idxn[j]] - 1;
734 #endif
735           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
736           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
737         }
738       }
739     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
740   }
741   PetscFunctionReturn(PETSC_SUCCESS);
742 }
743 
744 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
745 {
746   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
747   PetscInt    nstash, reallocs;
748 
749   PetscFunctionBegin;
750   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
751 
752   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
753   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
754   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
755   PetscFunctionReturn(PETSC_SUCCESS);
756 }
757 
758 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
759 {
760   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
761   PetscMPIInt  n;
762   PetscInt     i, j, rstart, ncols, flg;
763   PetscInt    *row, *col;
764   PetscBool    other_disassembled;
765   PetscScalar *val;
766 
767   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
768 
769   PetscFunctionBegin;
770   if (!aij->donotstash && !mat->nooffprocentries) {
771     while (1) {
772       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
773       if (!flg) break;
774 
775       for (i = 0; i < n;) {
776         /* Now identify the consecutive vals belonging to the same row */
777         for (j = i, rstart = row[j]; j < n; j++) {
778           if (row[j] != rstart) break;
779         }
780         if (j < n) ncols = j - i;
781         else ncols = n - i;
782         /* Now assemble all these values with a single function call */
783         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
784         i = j;
785       }
786     }
787     PetscCall(MatStashScatterEnd_Private(&mat->stash));
788   }
789 #if defined(PETSC_HAVE_DEVICE)
790   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
791   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
792   if (mat->boundtocpu) {
793     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
794     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
795   }
796 #endif
797   PetscCall(MatAssemblyBegin(aij->A, mode));
798   PetscCall(MatAssemblyEnd(aij->A, mode));
799 
800   /* determine if any processor has disassembled, if so we must
801      also disassemble ourself, in order that we may reassemble. */
802   /*
803      if nonzero structure of submatrix B cannot change then we know that
804      no processor disassembled thus we can skip this stuff
805   */
806   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
807     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
808     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
809       PetscCall(MatDisAssemble_MPIAIJ(mat));
810     }
811   }
812   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
813   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
814 #if defined(PETSC_HAVE_DEVICE)
815   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
816 #endif
817   PetscCall(MatAssemblyBegin(aij->B, mode));
818   PetscCall(MatAssemblyEnd(aij->B, mode));
819 
820   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
821 
822   aij->rowvalues = NULL;
823 
824   PetscCall(VecDestroy(&aij->diag));
825 
826   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
827   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
828     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
829     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
830   }
831 #if defined(PETSC_HAVE_DEVICE)
832   mat->offloadmask = PETSC_OFFLOAD_BOTH;
833 #endif
834   PetscFunctionReturn(PETSC_SUCCESS);
835 }
836 
837 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
838 {
839   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
840 
841   PetscFunctionBegin;
842   PetscCall(MatZeroEntries(l->A));
843   PetscCall(MatZeroEntries(l->B));
844   PetscFunctionReturn(PETSC_SUCCESS);
845 }
846 
847 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
848 {
849   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
850   PetscObjectState sA, sB;
851   PetscInt        *lrows;
852   PetscInt         r, len;
853   PetscBool        cong, lch, gch;
854 
855   PetscFunctionBegin;
856   /* get locally owned rows */
857   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
858   PetscCall(MatHasCongruentLayouts(A, &cong));
859   /* fix right hand side if needed */
860   if (x && b) {
861     const PetscScalar *xx;
862     PetscScalar       *bb;
863 
864     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
865     PetscCall(VecGetArrayRead(x, &xx));
866     PetscCall(VecGetArray(b, &bb));
867     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
868     PetscCall(VecRestoreArrayRead(x, &xx));
869     PetscCall(VecRestoreArray(b, &bb));
870   }
871 
872   sA = mat->A->nonzerostate;
873   sB = mat->B->nonzerostate;
874 
875   if (diag != 0.0 && cong) {
876     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
877     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
878   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
879     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
880     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
881     PetscInt    nnwA, nnwB;
882     PetscBool   nnzA, nnzB;
883 
884     nnwA = aijA->nonew;
885     nnwB = aijB->nonew;
886     nnzA = aijA->keepnonzeropattern;
887     nnzB = aijB->keepnonzeropattern;
888     if (!nnzA) {
889       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
890       aijA->nonew = 0;
891     }
892     if (!nnzB) {
893       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
894       aijB->nonew = 0;
895     }
896     /* Must zero here before the next loop */
897     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
898     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
899     for (r = 0; r < len; ++r) {
900       const PetscInt row = lrows[r] + A->rmap->rstart;
901       if (row >= A->cmap->N) continue;
902       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
903     }
904     aijA->nonew = nnwA;
905     aijB->nonew = nnwB;
906   } else {
907     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
908     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
909   }
910   PetscCall(PetscFree(lrows));
911   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
912   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
913 
914   /* reduce nonzerostate */
915   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
916   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
917   if (gch) A->nonzerostate++;
918   PetscFunctionReturn(PETSC_SUCCESS);
919 }
920 
921 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
922 {
923   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
924   PetscMPIInt        n = A->rmap->n;
925   PetscInt           i, j, r, m, len = 0;
926   PetscInt          *lrows, *owners = A->rmap->range;
927   PetscMPIInt        p = 0;
928   PetscSFNode       *rrows;
929   PetscSF            sf;
930   const PetscScalar *xx;
931   PetscScalar       *bb, *mask, *aij_a;
932   Vec                xmask, lmask;
933   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
934   const PetscInt    *aj, *ii, *ridx;
935   PetscScalar       *aa;
936 
937   PetscFunctionBegin;
938   /* Create SF where leaves are input rows and roots are owned rows */
939   PetscCall(PetscMalloc1(n, &lrows));
940   for (r = 0; r < n; ++r) lrows[r] = -1;
941   PetscCall(PetscMalloc1(N, &rrows));
942   for (r = 0; r < N; ++r) {
943     const PetscInt idx = rows[r];
944     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
945     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
946       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
947     }
948     rrows[r].rank  = p;
949     rrows[r].index = rows[r] - owners[p];
950   }
951   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
952   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
953   /* Collect flags for rows to be zeroed */
954   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
955   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
956   PetscCall(PetscSFDestroy(&sf));
957   /* Compress and put in row numbers */
958   for (r = 0; r < n; ++r)
959     if (lrows[r] >= 0) lrows[len++] = r;
960   /* zero diagonal part of matrix */
961   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
962   /* handle off diagonal part of matrix */
963   PetscCall(MatCreateVecs(A, &xmask, NULL));
964   PetscCall(VecDuplicate(l->lvec, &lmask));
965   PetscCall(VecGetArray(xmask, &bb));
966   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
967   PetscCall(VecRestoreArray(xmask, &bb));
968   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
969   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
970   PetscCall(VecDestroy(&xmask));
971   if (x && b) { /* this code is buggy when the row and column layout don't match */
972     PetscBool cong;
973 
974     PetscCall(MatHasCongruentLayouts(A, &cong));
975     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
976     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
977     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
978     PetscCall(VecGetArrayRead(l->lvec, &xx));
979     PetscCall(VecGetArray(b, &bb));
980   }
981   PetscCall(VecGetArray(lmask, &mask));
982   /* remove zeroed rows of off diagonal matrix */
983   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
984   ii = aij->i;
985   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
986   /* loop over all elements of off process part of matrix zeroing removed columns*/
987   if (aij->compressedrow.use) {
988     m    = aij->compressedrow.nrows;
989     ii   = aij->compressedrow.i;
990     ridx = aij->compressedrow.rindex;
991     for (i = 0; i < m; i++) {
992       n  = ii[i + 1] - ii[i];
993       aj = aij->j + ii[i];
994       aa = aij_a + ii[i];
995 
996       for (j = 0; j < n; j++) {
997         if (PetscAbsScalar(mask[*aj])) {
998           if (b) bb[*ridx] -= *aa * xx[*aj];
999           *aa = 0.0;
1000         }
1001         aa++;
1002         aj++;
1003       }
1004       ridx++;
1005     }
1006   } else { /* do not use compressed row format */
1007     m = l->B->rmap->n;
1008     for (i = 0; i < m; i++) {
1009       n  = ii[i + 1] - ii[i];
1010       aj = aij->j + ii[i];
1011       aa = aij_a + ii[i];
1012       for (j = 0; j < n; j++) {
1013         if (PetscAbsScalar(mask[*aj])) {
1014           if (b) bb[i] -= *aa * xx[*aj];
1015           *aa = 0.0;
1016         }
1017         aa++;
1018         aj++;
1019       }
1020     }
1021   }
1022   if (x && b) {
1023     PetscCall(VecRestoreArray(b, &bb));
1024     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1025   }
1026   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1027   PetscCall(VecRestoreArray(lmask, &mask));
1028   PetscCall(VecDestroy(&lmask));
1029   PetscCall(PetscFree(lrows));
1030 
1031   /* only change matrix nonzero state if pattern was allowed to be changed */
1032   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1033     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1034     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1035   }
1036   PetscFunctionReturn(PETSC_SUCCESS);
1037 }
1038 
1039 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1040 {
1041   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1042   PetscInt    nt;
1043   VecScatter  Mvctx = a->Mvctx;
1044 
1045   PetscFunctionBegin;
1046   PetscCall(VecGetLocalSize(xx, &nt));
1047   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1048   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1049   PetscUseTypeMethod(a->A, mult, xx, yy);
1050   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1051   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1056 {
1057   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1058 
1059   PetscFunctionBegin;
1060   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1061   PetscFunctionReturn(PETSC_SUCCESS);
1062 }
1063 
1064 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1065 {
1066   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1067   VecScatter  Mvctx = a->Mvctx;
1068 
1069   PetscFunctionBegin;
1070   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1071   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1072   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1073   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1078 {
1079   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1080 
1081   PetscFunctionBegin;
1082   /* do nondiagonal part */
1083   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1084   /* do local part */
1085   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1086   /* add partial results together */
1087   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1088   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1089   PetscFunctionReturn(PETSC_SUCCESS);
1090 }
1091 
1092 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1093 {
1094   MPI_Comm    comm;
1095   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1096   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1097   IS          Me, Notme;
1098   PetscInt    M, N, first, last, *notme, i;
1099   PetscBool   lf;
1100   PetscMPIInt size;
1101 
1102   PetscFunctionBegin;
1103   /* Easy test: symmetric diagonal block */
1104   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1105   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1106   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1107   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1108   PetscCallMPI(MPI_Comm_size(comm, &size));
1109   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1110 
1111   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1112   PetscCall(MatGetSize(Amat, &M, &N));
1113   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1114   PetscCall(PetscMalloc1(N - last + first, &notme));
1115   for (i = 0; i < first; i++) notme[i] = i;
1116   for (i = last; i < M; i++) notme[i - last + first] = i;
1117   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1118   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1119   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1120   Aoff = Aoffs[0];
1121   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1122   Boff = Boffs[0];
1123   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1124   PetscCall(MatDestroyMatrices(1, &Aoffs));
1125   PetscCall(MatDestroyMatrices(1, &Boffs));
1126   PetscCall(ISDestroy(&Me));
1127   PetscCall(ISDestroy(&Notme));
1128   PetscCall(PetscFree(notme));
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1133 {
1134   PetscFunctionBegin;
1135   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1136   PetscFunctionReturn(PETSC_SUCCESS);
1137 }
1138 
1139 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1140 {
1141   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1142 
1143   PetscFunctionBegin;
1144   /* do nondiagonal part */
1145   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1146   /* do local part */
1147   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1148   /* add partial results together */
1149   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1150   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1151   PetscFunctionReturn(PETSC_SUCCESS);
1152 }
1153 
1154 /*
1155   This only works correctly for square matrices where the subblock A->A is the
1156    diagonal block
1157 */
1158 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1159 {
1160   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1161 
1162   PetscFunctionBegin;
1163   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1164   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1165   PetscCall(MatGetDiagonal(a->A, v));
1166   PetscFunctionReturn(PETSC_SUCCESS);
1167 }
1168 
1169 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1170 {
1171   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1172 
1173   PetscFunctionBegin;
1174   PetscCall(MatScale(a->A, aa));
1175   PetscCall(MatScale(a->B, aa));
1176   PetscFunctionReturn(PETSC_SUCCESS);
1177 }
1178 
1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1180 {
1181   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1182   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1183   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1184   const PetscInt    *garray = aij->garray;
1185   const PetscScalar *aa, *ba;
1186   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1187   PetscInt64         nz, hnz;
1188   PetscInt          *rowlens;
1189   PetscInt          *colidxs;
1190   PetscScalar       *matvals;
1191   PetscMPIInt        rank;
1192 
1193   PetscFunctionBegin;
1194   PetscCall(PetscViewerSetUp(viewer));
1195 
1196   M  = mat->rmap->N;
1197   N  = mat->cmap->N;
1198   m  = mat->rmap->n;
1199   rs = mat->rmap->rstart;
1200   cs = mat->cmap->rstart;
1201   nz = A->nz + B->nz;
1202 
1203   /* write matrix header */
1204   header[0] = MAT_FILE_CLASSID;
1205   header[1] = M;
1206   header[2] = N;
1207   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1208   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1209   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1210   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1211 
1212   /* fill in and store row lengths  */
1213   PetscCall(PetscMalloc1(m, &rowlens));
1214   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1215   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1216   PetscCall(PetscFree(rowlens));
1217 
1218   /* fill in and store column indices */
1219   PetscCall(PetscMalloc1(nz, &colidxs));
1220   for (cnt = 0, i = 0; i < m; i++) {
1221     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1222       if (garray[B->j[jb]] > cs) break;
1223       colidxs[cnt++] = garray[B->j[jb]];
1224     }
1225     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1226     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1227   }
1228   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1229   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1230   PetscCall(PetscFree(colidxs));
1231 
1232   /* fill in and store nonzero values */
1233   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1235   PetscCall(PetscMalloc1(nz, &matvals));
1236   for (cnt = 0, i = 0; i < m; i++) {
1237     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       matvals[cnt++] = ba[jb];
1240     }
1241     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1242     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1243   }
1244   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1245   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1246   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1247   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1248   PetscCall(PetscFree(matvals));
1249 
1250   /* write block size option to the viewer's .info file */
1251   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1252   PetscFunctionReturn(PETSC_SUCCESS);
1253 }
1254 
1255 #include <petscdraw.h>
1256 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1257 {
1258   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1259   PetscMPIInt       rank = aij->rank, size = aij->size;
1260   PetscBool         isdraw, iascii, isbinary;
1261   PetscViewer       sviewer;
1262   PetscViewerFormat format;
1263 
1264   PetscFunctionBegin;
1265   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1266   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1267   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1268   if (iascii) {
1269     PetscCall(PetscViewerGetFormat(viewer, &format));
1270     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1271       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1272       PetscCall(PetscMalloc1(size, &nz));
1273       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1274       for (i = 0; i < (PetscInt)size; i++) {
1275         nmax = PetscMax(nmax, nz[i]);
1276         nmin = PetscMin(nmin, nz[i]);
1277         navg += nz[i];
1278       }
1279       PetscCall(PetscFree(nz));
1280       navg = navg / size;
1281       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1282       PetscFunctionReturn(PETSC_SUCCESS);
1283     }
1284     PetscCall(PetscViewerGetFormat(viewer, &format));
1285     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1286       MatInfo   info;
1287       PetscInt *inodes = NULL;
1288 
1289       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1290       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1291       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1292       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1293       if (!inodes) {
1294         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1295                                                      (double)info.memory));
1296       } else {
1297         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1298                                                      (double)info.memory));
1299       }
1300       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1301       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1302       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1303       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1304       PetscCall(PetscViewerFlush(viewer));
1305       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1306       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1307       PetscCall(VecScatterView(aij->Mvctx, viewer));
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1310       PetscInt inodecount, inodelimit, *inodes;
1311       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1312       if (inodes) {
1313         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1314       } else {
1315         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1316       }
1317       PetscFunctionReturn(PETSC_SUCCESS);
1318     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1319       PetscFunctionReturn(PETSC_SUCCESS);
1320     }
1321   } else if (isbinary) {
1322     if (size == 1) {
1323       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1324       PetscCall(MatView(aij->A, viewer));
1325     } else {
1326       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1327     }
1328     PetscFunctionReturn(PETSC_SUCCESS);
1329   } else if (iascii && size == 1) {
1330     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1331     PetscCall(MatView(aij->A, viewer));
1332     PetscFunctionReturn(PETSC_SUCCESS);
1333   } else if (isdraw) {
1334     PetscDraw draw;
1335     PetscBool isnull;
1336     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1337     PetscCall(PetscDrawIsNull(draw, &isnull));
1338     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1339   }
1340 
1341   { /* assemble the entire matrix onto first processor */
1342     Mat A = NULL, Av;
1343     IS  isrow, iscol;
1344 
1345     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1346     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1347     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1348     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1349     /*  The commented code uses MatCreateSubMatrices instead */
1350     /*
1351     Mat *AA, A = NULL, Av;
1352     IS  isrow,iscol;
1353 
1354     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1355     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1356     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1357     if (rank == 0) {
1358        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1359        A    = AA[0];
1360        Av   = AA[0];
1361     }
1362     PetscCall(MatDestroySubMatrices(1,&AA));
1363 */
1364     PetscCall(ISDestroy(&iscol));
1365     PetscCall(ISDestroy(&isrow));
1366     /*
1367        Everyone has to call to draw the matrix since the graphics waits are
1368        synchronized across all processors that share the PetscDraw object
1369     */
1370     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1371     if (rank == 0) {
1372       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1373       PetscCall(MatView_SeqAIJ(Av, sviewer));
1374     }
1375     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1376     PetscCall(PetscViewerFlush(viewer));
1377     PetscCall(MatDestroy(&A));
1378   }
1379   PetscFunctionReturn(PETSC_SUCCESS);
1380 }
1381 
1382 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1383 {
1384   PetscBool iascii, isdraw, issocket, isbinary;
1385 
1386   PetscFunctionBegin;
1387   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1388   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1389   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1390   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1391   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1392   PetscFunctionReturn(PETSC_SUCCESS);
1393 }
1394 
1395 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1396 {
1397   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1398   Vec         bb1 = NULL;
1399   PetscBool   hasop;
1400 
1401   PetscFunctionBegin;
1402   if (flag == SOR_APPLY_UPPER) {
1403     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1404     PetscFunctionReturn(PETSC_SUCCESS);
1405   }
1406 
1407   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1408 
1409   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1410     if (flag & SOR_ZERO_INITIAL_GUESS) {
1411       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1412       its--;
1413     }
1414 
1415     while (its--) {
1416       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1417       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1418 
1419       /* update rhs: bb1 = bb - B*x */
1420       PetscCall(VecScale(mat->lvec, -1.0));
1421       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1422 
1423       /* local sweep */
1424       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1425     }
1426   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1427     if (flag & SOR_ZERO_INITIAL_GUESS) {
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1429       its--;
1430     }
1431     while (its--) {
1432       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1433       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1434 
1435       /* update rhs: bb1 = bb - B*x */
1436       PetscCall(VecScale(mat->lvec, -1.0));
1437       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1438 
1439       /* local sweep */
1440       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1441     }
1442   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1443     if (flag & SOR_ZERO_INITIAL_GUESS) {
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1445       its--;
1446     }
1447     while (its--) {
1448       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1449       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1450 
1451       /* update rhs: bb1 = bb - B*x */
1452       PetscCall(VecScale(mat->lvec, -1.0));
1453       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1454 
1455       /* local sweep */
1456       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1457     }
1458   } else if (flag & SOR_EISENSTAT) {
1459     Vec xx1;
1460 
1461     PetscCall(VecDuplicate(bb, &xx1));
1462     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1463 
1464     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1465     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1466     if (!mat->diag) {
1467       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1468       PetscCall(MatGetDiagonal(matin, mat->diag));
1469     }
1470     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1471     if (hasop) {
1472       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1473     } else {
1474       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1475     }
1476     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1477 
1478     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1479 
1480     /* local sweep */
1481     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1482     PetscCall(VecAXPY(xx, 1.0, xx1));
1483     PetscCall(VecDestroy(&xx1));
1484   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1485 
1486   PetscCall(VecDestroy(&bb1));
1487 
1488   matin->factorerrortype = mat->A->factorerrortype;
1489   PetscFunctionReturn(PETSC_SUCCESS);
1490 }
1491 
1492 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1493 {
1494   Mat             aA, aB, Aperm;
1495   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1496   PetscScalar    *aa, *ba;
1497   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1498   PetscSF         rowsf, sf;
1499   IS              parcolp = NULL;
1500   PetscBool       done;
1501 
1502   PetscFunctionBegin;
1503   PetscCall(MatGetLocalSize(A, &m, &n));
1504   PetscCall(ISGetIndices(rowp, &rwant));
1505   PetscCall(ISGetIndices(colp, &cwant));
1506   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1507 
1508   /* Invert row permutation to find out where my rows should go */
1509   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1510   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1511   PetscCall(PetscSFSetFromOptions(rowsf));
1512   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1513   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1514   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1515 
1516   /* Invert column permutation to find out where my columns should go */
1517   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1518   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1519   PetscCall(PetscSFSetFromOptions(sf));
1520   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1521   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1522   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1523   PetscCall(PetscSFDestroy(&sf));
1524 
1525   PetscCall(ISRestoreIndices(rowp, &rwant));
1526   PetscCall(ISRestoreIndices(colp, &cwant));
1527   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1528 
1529   /* Find out where my gcols should go */
1530   PetscCall(MatGetSize(aB, NULL, &ng));
1531   PetscCall(PetscMalloc1(ng, &gcdest));
1532   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1533   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1534   PetscCall(PetscSFSetFromOptions(sf));
1535   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1536   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1537   PetscCall(PetscSFDestroy(&sf));
1538 
1539   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1540   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1541   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1542   for (i = 0; i < m; i++) {
1543     PetscInt    row = rdest[i];
1544     PetscMPIInt rowner;
1545     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1546     for (j = ai[i]; j < ai[i + 1]; j++) {
1547       PetscInt    col = cdest[aj[j]];
1548       PetscMPIInt cowner;
1549       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1550       if (rowner == cowner) dnnz[i]++;
1551       else onnz[i]++;
1552     }
1553     for (j = bi[i]; j < bi[i + 1]; j++) {
1554       PetscInt    col = gcdest[bj[j]];
1555       PetscMPIInt cowner;
1556       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1557       if (rowner == cowner) dnnz[i]++;
1558       else onnz[i]++;
1559     }
1560   }
1561   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1562   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1563   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1564   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1565   PetscCall(PetscSFDestroy(&rowsf));
1566 
1567   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1568   PetscCall(MatSeqAIJGetArray(aA, &aa));
1569   PetscCall(MatSeqAIJGetArray(aB, &ba));
1570   for (i = 0; i < m; i++) {
1571     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1572     PetscInt  j0, rowlen;
1573     rowlen = ai[i + 1] - ai[i];
1574     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1575       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1576       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1577     }
1578     rowlen = bi[i + 1] - bi[i];
1579     for (j0 = j = 0; j < rowlen; j0 = j) {
1580       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1581       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1582     }
1583   }
1584   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1585   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1586   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1587   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1588   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1589   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1590   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1591   PetscCall(PetscFree3(work, rdest, cdest));
1592   PetscCall(PetscFree(gcdest));
1593   if (parcolp) PetscCall(ISDestroy(&colp));
1594   *B = Aperm;
1595   PetscFunctionReturn(PETSC_SUCCESS);
1596 }
1597 
1598 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1599 {
1600   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1601 
1602   PetscFunctionBegin;
1603   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1604   if (ghosts) *ghosts = aij->garray;
1605   PetscFunctionReturn(PETSC_SUCCESS);
1606 }
1607 
1608 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1609 {
1610   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1611   Mat            A = mat->A, B = mat->B;
1612   PetscLogDouble isend[5], irecv[5];
1613 
1614   PetscFunctionBegin;
1615   info->block_size = 1.0;
1616   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1617 
1618   isend[0] = info->nz_used;
1619   isend[1] = info->nz_allocated;
1620   isend[2] = info->nz_unneeded;
1621   isend[3] = info->memory;
1622   isend[4] = info->mallocs;
1623 
1624   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1625 
1626   isend[0] += info->nz_used;
1627   isend[1] += info->nz_allocated;
1628   isend[2] += info->nz_unneeded;
1629   isend[3] += info->memory;
1630   isend[4] += info->mallocs;
1631   if (flag == MAT_LOCAL) {
1632     info->nz_used      = isend[0];
1633     info->nz_allocated = isend[1];
1634     info->nz_unneeded  = isend[2];
1635     info->memory       = isend[3];
1636     info->mallocs      = isend[4];
1637   } else if (flag == MAT_GLOBAL_MAX) {
1638     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1639 
1640     info->nz_used      = irecv[0];
1641     info->nz_allocated = irecv[1];
1642     info->nz_unneeded  = irecv[2];
1643     info->memory       = irecv[3];
1644     info->mallocs      = irecv[4];
1645   } else if (flag == MAT_GLOBAL_SUM) {
1646     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1647 
1648     info->nz_used      = irecv[0];
1649     info->nz_allocated = irecv[1];
1650     info->nz_unneeded  = irecv[2];
1651     info->memory       = irecv[3];
1652     info->mallocs      = irecv[4];
1653   }
1654   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1655   info->fill_ratio_needed = 0;
1656   info->factor_mallocs    = 0;
1657   PetscFunctionReturn(PETSC_SUCCESS);
1658 }
1659 
1660 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1661 {
1662   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1663 
1664   PetscFunctionBegin;
1665   switch (op) {
1666   case MAT_NEW_NONZERO_LOCATIONS:
1667   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1668   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1669   case MAT_KEEP_NONZERO_PATTERN:
1670   case MAT_NEW_NONZERO_LOCATION_ERR:
1671   case MAT_USE_INODES:
1672   case MAT_IGNORE_ZERO_ENTRIES:
1673   case MAT_FORM_EXPLICIT_TRANSPOSE:
1674     MatCheckPreallocated(A, 1);
1675     PetscCall(MatSetOption(a->A, op, flg));
1676     PetscCall(MatSetOption(a->B, op, flg));
1677     break;
1678   case MAT_ROW_ORIENTED:
1679     MatCheckPreallocated(A, 1);
1680     a->roworiented = flg;
1681 
1682     PetscCall(MatSetOption(a->A, op, flg));
1683     PetscCall(MatSetOption(a->B, op, flg));
1684     break;
1685   case MAT_FORCE_DIAGONAL_ENTRIES:
1686   case MAT_SORTED_FULL:
1687     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1688     break;
1689   case MAT_IGNORE_OFF_PROC_ENTRIES:
1690     a->donotstash = flg;
1691     break;
1692   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1693   case MAT_SPD:
1694   case MAT_SYMMETRIC:
1695   case MAT_STRUCTURALLY_SYMMETRIC:
1696   case MAT_HERMITIAN:
1697   case MAT_SYMMETRY_ETERNAL:
1698   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1699   case MAT_SPD_ETERNAL:
1700     /* if the diagonal matrix is square it inherits some of the properties above */
1701     break;
1702   case MAT_SUBMAT_SINGLEIS:
1703     A->submat_singleis = flg;
1704     break;
1705   case MAT_STRUCTURE_ONLY:
1706     /* The option is handled directly by MatSetOption() */
1707     break;
1708   default:
1709     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1710   }
1711   PetscFunctionReturn(PETSC_SUCCESS);
1712 }
1713 
1714 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1715 {
1716   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1717   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1718   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1719   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1720   PetscInt    *cmap, *idx_p;
1721 
1722   PetscFunctionBegin;
1723   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1724   mat->getrowactive = PETSC_TRUE;
1725 
1726   if (!mat->rowvalues && (idx || v)) {
1727     /*
1728         allocate enough space to hold information from the longest row.
1729     */
1730     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1731     PetscInt    max = 1, tmp;
1732     for (i = 0; i < matin->rmap->n; i++) {
1733       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1734       if (max < tmp) max = tmp;
1735     }
1736     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1737   }
1738 
1739   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1740   lrow = row - rstart;
1741 
1742   pvA = &vworkA;
1743   pcA = &cworkA;
1744   pvB = &vworkB;
1745   pcB = &cworkB;
1746   if (!v) {
1747     pvA = NULL;
1748     pvB = NULL;
1749   }
1750   if (!idx) {
1751     pcA = NULL;
1752     if (!v) pcB = NULL;
1753   }
1754   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1755   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1756   nztot = nzA + nzB;
1757 
1758   cmap = mat->garray;
1759   if (v || idx) {
1760     if (nztot) {
1761       /* Sort by increasing column numbers, assuming A and B already sorted */
1762       PetscInt imark = -1;
1763       if (v) {
1764         *v = v_p = mat->rowvalues;
1765         for (i = 0; i < nzB; i++) {
1766           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1767           else break;
1768         }
1769         imark = i;
1770         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1771         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1772       }
1773       if (idx) {
1774         *idx = idx_p = mat->rowindices;
1775         if (imark > -1) {
1776           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1777         } else {
1778           for (i = 0; i < nzB; i++) {
1779             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1780             else break;
1781           }
1782           imark = i;
1783         }
1784         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1785         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1786       }
1787     } else {
1788       if (idx) *idx = NULL;
1789       if (v) *v = NULL;
1790     }
1791   }
1792   *nz = nztot;
1793   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1794   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1795   PetscFunctionReturn(PETSC_SUCCESS);
1796 }
1797 
1798 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1799 {
1800   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1801 
1802   PetscFunctionBegin;
1803   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1804   aij->getrowactive = PETSC_FALSE;
1805   PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807 
1808 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1809 {
1810   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1811   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1812   PetscInt         i, j, cstart = mat->cmap->rstart;
1813   PetscReal        sum = 0.0;
1814   const MatScalar *v, *amata, *bmata;
1815 
1816   PetscFunctionBegin;
1817   if (aij->size == 1) {
1818     PetscCall(MatNorm(aij->A, type, norm));
1819   } else {
1820     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1821     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1822     if (type == NORM_FROBENIUS) {
1823       v = amata;
1824       for (i = 0; i < amat->nz; i++) {
1825         sum += PetscRealPart(PetscConj(*v) * (*v));
1826         v++;
1827       }
1828       v = bmata;
1829       for (i = 0; i < bmat->nz; i++) {
1830         sum += PetscRealPart(PetscConj(*v) * (*v));
1831         v++;
1832       }
1833       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1834       *norm = PetscSqrtReal(*norm);
1835       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1836     } else if (type == NORM_1) { /* max column norm */
1837       PetscReal *tmp, *tmp2;
1838       PetscInt  *jj, *garray = aij->garray;
1839       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1840       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1841       *norm = 0.0;
1842       v     = amata;
1843       jj    = amat->j;
1844       for (j = 0; j < amat->nz; j++) {
1845         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1846         v++;
1847       }
1848       v  = bmata;
1849       jj = bmat->j;
1850       for (j = 0; j < bmat->nz; j++) {
1851         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1852         v++;
1853       }
1854       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1855       for (j = 0; j < mat->cmap->N; j++) {
1856         if (tmp2[j] > *norm) *norm = tmp2[j];
1857       }
1858       PetscCall(PetscFree(tmp));
1859       PetscCall(PetscFree(tmp2));
1860       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1861     } else if (type == NORM_INFINITY) { /* max row norm */
1862       PetscReal ntemp = 0.0;
1863       for (j = 0; j < aij->A->rmap->n; j++) {
1864         v   = amata + amat->i[j];
1865         sum = 0.0;
1866         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1867           sum += PetscAbsScalar(*v);
1868           v++;
1869         }
1870         v = bmata + bmat->i[j];
1871         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1872           sum += PetscAbsScalar(*v);
1873           v++;
1874         }
1875         if (sum > ntemp) ntemp = sum;
1876       }
1877       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1878       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1879     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1880     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1881     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1882   }
1883   PetscFunctionReturn(PETSC_SUCCESS);
1884 }
1885 
1886 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1887 {
1888   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1889   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1890   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1891   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1892   Mat              B, A_diag, *B_diag;
1893   const MatScalar *pbv, *bv;
1894 
1895   PetscFunctionBegin;
1896   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1897   ma = A->rmap->n;
1898   na = A->cmap->n;
1899   mb = a->B->rmap->n;
1900   nb = a->B->cmap->n;
1901   ai = Aloc->i;
1902   aj = Aloc->j;
1903   bi = Bloc->i;
1904   bj = Bloc->j;
1905   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1906     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1907     PetscSFNode         *oloc;
1908     PETSC_UNUSED PetscSF sf;
1909 
1910     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1911     /* compute d_nnz for preallocation */
1912     PetscCall(PetscArrayzero(d_nnz, na));
1913     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1914     /* compute local off-diagonal contributions */
1915     PetscCall(PetscArrayzero(g_nnz, nb));
1916     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1917     /* map those to global */
1918     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1919     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1920     PetscCall(PetscSFSetFromOptions(sf));
1921     PetscCall(PetscArrayzero(o_nnz, na));
1922     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1923     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1924     PetscCall(PetscSFDestroy(&sf));
1925 
1926     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1927     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1928     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1929     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1930     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1931     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1932   } else {
1933     B = *matout;
1934     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1935   }
1936 
1937   b           = (Mat_MPIAIJ *)B->data;
1938   A_diag      = a->A;
1939   B_diag      = &b->A;
1940   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1941   A_diag_ncol = A_diag->cmap->N;
1942   B_diag_ilen = sub_B_diag->ilen;
1943   B_diag_i    = sub_B_diag->i;
1944 
1945   /* Set ilen for diagonal of B */
1946   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1947 
1948   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1949   very quickly (=without using MatSetValues), because all writes are local. */
1950   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1951   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1952 
1953   /* copy over the B part */
1954   PetscCall(PetscMalloc1(bi[mb], &cols));
1955   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1956   pbv = bv;
1957   row = A->rmap->rstart;
1958   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1959   cols_tmp = cols;
1960   for (i = 0; i < mb; i++) {
1961     ncol = bi[i + 1] - bi[i];
1962     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1963     row++;
1964     pbv += ncol;
1965     cols_tmp += ncol;
1966   }
1967   PetscCall(PetscFree(cols));
1968   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1969 
1970   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1971   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1972   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1973     *matout = B;
1974   } else {
1975     PetscCall(MatHeaderMerge(A, &B));
1976   }
1977   PetscFunctionReturn(PETSC_SUCCESS);
1978 }
1979 
1980 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1981 {
1982   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1983   Mat         a = aij->A, b = aij->B;
1984   PetscInt    s1, s2, s3;
1985 
1986   PetscFunctionBegin;
1987   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1988   if (rr) {
1989     PetscCall(VecGetLocalSize(rr, &s1));
1990     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1991     /* Overlap communication with computation. */
1992     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1993   }
1994   if (ll) {
1995     PetscCall(VecGetLocalSize(ll, &s1));
1996     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1997     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1998   }
1999   /* scale  the diagonal block */
2000   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2001 
2002   if (rr) {
2003     /* Do a scatter end and then right scale the off-diagonal block */
2004     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2005     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2006   }
2007   PetscFunctionReturn(PETSC_SUCCESS);
2008 }
2009 
2010 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2011 {
2012   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2013 
2014   PetscFunctionBegin;
2015   PetscCall(MatSetUnfactored(a->A));
2016   PetscFunctionReturn(PETSC_SUCCESS);
2017 }
2018 
2019 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2020 {
2021   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2022   Mat         a, b, c, d;
2023   PetscBool   flg;
2024 
2025   PetscFunctionBegin;
2026   a = matA->A;
2027   b = matA->B;
2028   c = matB->A;
2029   d = matB->B;
2030 
2031   PetscCall(MatEqual(a, c, &flg));
2032   if (flg) PetscCall(MatEqual(b, d, &flg));
2033   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2034   PetscFunctionReturn(PETSC_SUCCESS);
2035 }
2036 
2037 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2038 {
2039   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2040   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2041 
2042   PetscFunctionBegin;
2043   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2044   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2045     /* because of the column compression in the off-processor part of the matrix a->B,
2046        the number of columns in a->B and b->B may be different, hence we cannot call
2047        the MatCopy() directly on the two parts. If need be, we can provide a more
2048        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2049        then copying the submatrices */
2050     PetscCall(MatCopy_Basic(A, B, str));
2051   } else {
2052     PetscCall(MatCopy(a->A, b->A, str));
2053     PetscCall(MatCopy(a->B, b->B, str));
2054   }
2055   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2056   PetscFunctionReturn(PETSC_SUCCESS);
2057 }
2058 
2059 /*
2060    Computes the number of nonzeros per row needed for preallocation when X and Y
2061    have different nonzero structure.
2062 */
2063 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2064 {
2065   PetscInt i, j, k, nzx, nzy;
2066 
2067   PetscFunctionBegin;
2068   /* Set the number of nonzeros in the new matrix */
2069   for (i = 0; i < m; i++) {
2070     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2071     nzx    = xi[i + 1] - xi[i];
2072     nzy    = yi[i + 1] - yi[i];
2073     nnz[i] = 0;
2074     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2075       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2076       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2077       nnz[i]++;
2078     }
2079     for (; k < nzy; k++) nnz[i]++;
2080   }
2081   PetscFunctionReturn(PETSC_SUCCESS);
2082 }
2083 
2084 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2085 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2086 {
2087   PetscInt    m = Y->rmap->N;
2088   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2089   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2090 
2091   PetscFunctionBegin;
2092   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2093   PetscFunctionReturn(PETSC_SUCCESS);
2094 }
2095 
2096 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2097 {
2098   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2099 
2100   PetscFunctionBegin;
2101   if (str == SAME_NONZERO_PATTERN) {
2102     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2103     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2104   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2105     PetscCall(MatAXPY_Basic(Y, a, X, str));
2106   } else {
2107     Mat       B;
2108     PetscInt *nnz_d, *nnz_o;
2109 
2110     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2111     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2112     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2113     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2114     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2115     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2116     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2117     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2118     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2119     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2120     PetscCall(MatHeaderMerge(Y, &B));
2121     PetscCall(PetscFree(nnz_d));
2122     PetscCall(PetscFree(nnz_o));
2123   }
2124   PetscFunctionReturn(PETSC_SUCCESS);
2125 }
2126 
2127 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2128 
2129 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2130 {
2131   PetscFunctionBegin;
2132   if (PetscDefined(USE_COMPLEX)) {
2133     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2134 
2135     PetscCall(MatConjugate_SeqAIJ(aij->A));
2136     PetscCall(MatConjugate_SeqAIJ(aij->B));
2137   }
2138   PetscFunctionReturn(PETSC_SUCCESS);
2139 }
2140 
2141 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2144 
2145   PetscFunctionBegin;
2146   PetscCall(MatRealPart(a->A));
2147   PetscCall(MatRealPart(a->B));
2148   PetscFunctionReturn(PETSC_SUCCESS);
2149 }
2150 
2151 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2152 {
2153   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2154 
2155   PetscFunctionBegin;
2156   PetscCall(MatImaginaryPart(a->A));
2157   PetscCall(MatImaginaryPart(a->B));
2158   PetscFunctionReturn(PETSC_SUCCESS);
2159 }
2160 
2161 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2162 {
2163   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2164   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2165   PetscScalar       *va, *vv;
2166   Vec                vB, vA;
2167   const PetscScalar *vb;
2168 
2169   PetscFunctionBegin;
2170   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2171   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2172 
2173   PetscCall(VecGetArrayWrite(vA, &va));
2174   if (idx) {
2175     for (i = 0; i < m; i++) {
2176       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2177     }
2178   }
2179 
2180   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2181   PetscCall(PetscMalloc1(m, &idxb));
2182   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2183 
2184   PetscCall(VecGetArrayWrite(v, &vv));
2185   PetscCall(VecGetArrayRead(vB, &vb));
2186   for (i = 0; i < m; i++) {
2187     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2188       vv[i] = vb[i];
2189       if (idx) idx[i] = a->garray[idxb[i]];
2190     } else {
2191       vv[i] = va[i];
2192       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2193     }
2194   }
2195   PetscCall(VecRestoreArrayWrite(vA, &vv));
2196   PetscCall(VecRestoreArrayWrite(vA, &va));
2197   PetscCall(VecRestoreArrayRead(vB, &vb));
2198   PetscCall(PetscFree(idxb));
2199   PetscCall(VecDestroy(&vA));
2200   PetscCall(VecDestroy(&vB));
2201   PetscFunctionReturn(PETSC_SUCCESS);
2202 }
2203 
2204 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2205 {
2206   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2207   PetscInt           m = A->rmap->n, n = A->cmap->n;
2208   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2209   PetscInt          *cmap = mat->garray;
2210   PetscInt          *diagIdx, *offdiagIdx;
2211   Vec                diagV, offdiagV;
2212   PetscScalar       *a, *diagA, *offdiagA;
2213   const PetscScalar *ba, *bav;
2214   PetscInt           r, j, col, ncols, *bi, *bj;
2215   Mat                B = mat->B;
2216   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2217 
2218   PetscFunctionBegin;
2219   /* When a process holds entire A and other processes have no entry */
2220   if (A->cmap->N == n) {
2221     PetscCall(VecGetArrayWrite(v, &diagA));
2222     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2223     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2224     PetscCall(VecDestroy(&diagV));
2225     PetscCall(VecRestoreArrayWrite(v, &diagA));
2226     PetscFunctionReturn(PETSC_SUCCESS);
2227   } else if (n == 0) {
2228     if (m) {
2229       PetscCall(VecGetArrayWrite(v, &a));
2230       for (r = 0; r < m; r++) {
2231         a[r] = 0.0;
2232         if (idx) idx[r] = -1;
2233       }
2234       PetscCall(VecRestoreArrayWrite(v, &a));
2235     }
2236     PetscFunctionReturn(PETSC_SUCCESS);
2237   }
2238 
2239   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2240   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2241   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2242   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2243 
2244   /* Get offdiagIdx[] for implicit 0.0 */
2245   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2246   ba = bav;
2247   bi = b->i;
2248   bj = b->j;
2249   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2250   for (r = 0; r < m; r++) {
2251     ncols = bi[r + 1] - bi[r];
2252     if (ncols == A->cmap->N - n) { /* Brow is dense */
2253       offdiagA[r]   = *ba;
2254       offdiagIdx[r] = cmap[0];
2255     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2256       offdiagA[r] = 0.0;
2257 
2258       /* Find first hole in the cmap */
2259       for (j = 0; j < ncols; j++) {
2260         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2261         if (col > j && j < cstart) {
2262           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2263           break;
2264         } else if (col > j + n && j >= cstart) {
2265           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2266           break;
2267         }
2268       }
2269       if (j == ncols && ncols < A->cmap->N - n) {
2270         /* a hole is outside compressed Bcols */
2271         if (ncols == 0) {
2272           if (cstart) {
2273             offdiagIdx[r] = 0;
2274           } else offdiagIdx[r] = cend;
2275         } else { /* ncols > 0 */
2276           offdiagIdx[r] = cmap[ncols - 1] + 1;
2277           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2278         }
2279       }
2280     }
2281 
2282     for (j = 0; j < ncols; j++) {
2283       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2284         offdiagA[r]   = *ba;
2285         offdiagIdx[r] = cmap[*bj];
2286       }
2287       ba++;
2288       bj++;
2289     }
2290   }
2291 
2292   PetscCall(VecGetArrayWrite(v, &a));
2293   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2294   for (r = 0; r < m; ++r) {
2295     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2296       a[r] = diagA[r];
2297       if (idx) idx[r] = cstart + diagIdx[r];
2298     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2299       a[r] = diagA[r];
2300       if (idx) {
2301         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2302           idx[r] = cstart + diagIdx[r];
2303         } else idx[r] = offdiagIdx[r];
2304       }
2305     } else {
2306       a[r] = offdiagA[r];
2307       if (idx) idx[r] = offdiagIdx[r];
2308     }
2309   }
2310   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2311   PetscCall(VecRestoreArrayWrite(v, &a));
2312   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2313   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2314   PetscCall(VecDestroy(&diagV));
2315   PetscCall(VecDestroy(&offdiagV));
2316   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2317   PetscFunctionReturn(PETSC_SUCCESS);
2318 }
2319 
2320 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2321 {
2322   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2323   PetscInt           m = A->rmap->n, n = A->cmap->n;
2324   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2325   PetscInt          *cmap = mat->garray;
2326   PetscInt          *diagIdx, *offdiagIdx;
2327   Vec                diagV, offdiagV;
2328   PetscScalar       *a, *diagA, *offdiagA;
2329   const PetscScalar *ba, *bav;
2330   PetscInt           r, j, col, ncols, *bi, *bj;
2331   Mat                B = mat->B;
2332   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2333 
2334   PetscFunctionBegin;
2335   /* When a process holds entire A and other processes have no entry */
2336   if (A->cmap->N == n) {
2337     PetscCall(VecGetArrayWrite(v, &diagA));
2338     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2339     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2340     PetscCall(VecDestroy(&diagV));
2341     PetscCall(VecRestoreArrayWrite(v, &diagA));
2342     PetscFunctionReturn(PETSC_SUCCESS);
2343   } else if (n == 0) {
2344     if (m) {
2345       PetscCall(VecGetArrayWrite(v, &a));
2346       for (r = 0; r < m; r++) {
2347         a[r] = PETSC_MAX_REAL;
2348         if (idx) idx[r] = -1;
2349       }
2350       PetscCall(VecRestoreArrayWrite(v, &a));
2351     }
2352     PetscFunctionReturn(PETSC_SUCCESS);
2353   }
2354 
2355   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2356   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2357   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2358   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2359 
2360   /* Get offdiagIdx[] for implicit 0.0 */
2361   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2362   ba = bav;
2363   bi = b->i;
2364   bj = b->j;
2365   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2366   for (r = 0; r < m; r++) {
2367     ncols = bi[r + 1] - bi[r];
2368     if (ncols == A->cmap->N - n) { /* Brow is dense */
2369       offdiagA[r]   = *ba;
2370       offdiagIdx[r] = cmap[0];
2371     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2372       offdiagA[r] = 0.0;
2373 
2374       /* Find first hole in the cmap */
2375       for (j = 0; j < ncols; j++) {
2376         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2377         if (col > j && j < cstart) {
2378           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2379           break;
2380         } else if (col > j + n && j >= cstart) {
2381           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2382           break;
2383         }
2384       }
2385       if (j == ncols && ncols < A->cmap->N - n) {
2386         /* a hole is outside compressed Bcols */
2387         if (ncols == 0) {
2388           if (cstart) {
2389             offdiagIdx[r] = 0;
2390           } else offdiagIdx[r] = cend;
2391         } else { /* ncols > 0 */
2392           offdiagIdx[r] = cmap[ncols - 1] + 1;
2393           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2394         }
2395       }
2396     }
2397 
2398     for (j = 0; j < ncols; j++) {
2399       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2400         offdiagA[r]   = *ba;
2401         offdiagIdx[r] = cmap[*bj];
2402       }
2403       ba++;
2404       bj++;
2405     }
2406   }
2407 
2408   PetscCall(VecGetArrayWrite(v, &a));
2409   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2410   for (r = 0; r < m; ++r) {
2411     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2412       a[r] = diagA[r];
2413       if (idx) idx[r] = cstart + diagIdx[r];
2414     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2415       a[r] = diagA[r];
2416       if (idx) {
2417         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2418           idx[r] = cstart + diagIdx[r];
2419         } else idx[r] = offdiagIdx[r];
2420       }
2421     } else {
2422       a[r] = offdiagA[r];
2423       if (idx) idx[r] = offdiagIdx[r];
2424     }
2425   }
2426   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2427   PetscCall(VecRestoreArrayWrite(v, &a));
2428   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2429   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2430   PetscCall(VecDestroy(&diagV));
2431   PetscCall(VecDestroy(&offdiagV));
2432   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2433   PetscFunctionReturn(PETSC_SUCCESS);
2434 }
2435 
2436 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2437 {
2438   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2439   PetscInt           m = A->rmap->n, n = A->cmap->n;
2440   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2441   PetscInt          *cmap = mat->garray;
2442   PetscInt          *diagIdx, *offdiagIdx;
2443   Vec                diagV, offdiagV;
2444   PetscScalar       *a, *diagA, *offdiagA;
2445   const PetscScalar *ba, *bav;
2446   PetscInt           r, j, col, ncols, *bi, *bj;
2447   Mat                B = mat->B;
2448   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2449 
2450   PetscFunctionBegin;
2451   /* When a process holds entire A and other processes have no entry */
2452   if (A->cmap->N == n) {
2453     PetscCall(VecGetArrayWrite(v, &diagA));
2454     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2455     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2456     PetscCall(VecDestroy(&diagV));
2457     PetscCall(VecRestoreArrayWrite(v, &diagA));
2458     PetscFunctionReturn(PETSC_SUCCESS);
2459   } else if (n == 0) {
2460     if (m) {
2461       PetscCall(VecGetArrayWrite(v, &a));
2462       for (r = 0; r < m; r++) {
2463         a[r] = PETSC_MIN_REAL;
2464         if (idx) idx[r] = -1;
2465       }
2466       PetscCall(VecRestoreArrayWrite(v, &a));
2467     }
2468     PetscFunctionReturn(PETSC_SUCCESS);
2469   }
2470 
2471   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2472   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2473   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2474   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2475 
2476   /* Get offdiagIdx[] for implicit 0.0 */
2477   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2478   ba = bav;
2479   bi = b->i;
2480   bj = b->j;
2481   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2482   for (r = 0; r < m; r++) {
2483     ncols = bi[r + 1] - bi[r];
2484     if (ncols == A->cmap->N - n) { /* Brow is dense */
2485       offdiagA[r]   = *ba;
2486       offdiagIdx[r] = cmap[0];
2487     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2488       offdiagA[r] = 0.0;
2489 
2490       /* Find first hole in the cmap */
2491       for (j = 0; j < ncols; j++) {
2492         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2493         if (col > j && j < cstart) {
2494           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2495           break;
2496         } else if (col > j + n && j >= cstart) {
2497           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2498           break;
2499         }
2500       }
2501       if (j == ncols && ncols < A->cmap->N - n) {
2502         /* a hole is outside compressed Bcols */
2503         if (ncols == 0) {
2504           if (cstart) {
2505             offdiagIdx[r] = 0;
2506           } else offdiagIdx[r] = cend;
2507         } else { /* ncols > 0 */
2508           offdiagIdx[r] = cmap[ncols - 1] + 1;
2509           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2510         }
2511       }
2512     }
2513 
2514     for (j = 0; j < ncols; j++) {
2515       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2516         offdiagA[r]   = *ba;
2517         offdiagIdx[r] = cmap[*bj];
2518       }
2519       ba++;
2520       bj++;
2521     }
2522   }
2523 
2524   PetscCall(VecGetArrayWrite(v, &a));
2525   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2526   for (r = 0; r < m; ++r) {
2527     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2528       a[r] = diagA[r];
2529       if (idx) idx[r] = cstart + diagIdx[r];
2530     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2531       a[r] = diagA[r];
2532       if (idx) {
2533         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2534           idx[r] = cstart + diagIdx[r];
2535         } else idx[r] = offdiagIdx[r];
2536       }
2537     } else {
2538       a[r] = offdiagA[r];
2539       if (idx) idx[r] = offdiagIdx[r];
2540     }
2541   }
2542   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2543   PetscCall(VecRestoreArrayWrite(v, &a));
2544   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2545   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2546   PetscCall(VecDestroy(&diagV));
2547   PetscCall(VecDestroy(&offdiagV));
2548   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2549   PetscFunctionReturn(PETSC_SUCCESS);
2550 }
2551 
2552 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2553 {
2554   Mat *dummy;
2555 
2556   PetscFunctionBegin;
2557   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2558   *newmat = *dummy;
2559   PetscCall(PetscFree(dummy));
2560   PetscFunctionReturn(PETSC_SUCCESS);
2561 }
2562 
2563 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2564 {
2565   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2566 
2567   PetscFunctionBegin;
2568   PetscCall(MatInvertBlockDiagonal(a->A, values));
2569   A->factorerrortype = a->A->factorerrortype;
2570   PetscFunctionReturn(PETSC_SUCCESS);
2571 }
2572 
2573 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2574 {
2575   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2576 
2577   PetscFunctionBegin;
2578   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2579   PetscCall(MatSetRandom(aij->A, rctx));
2580   if (x->assembled) {
2581     PetscCall(MatSetRandom(aij->B, rctx));
2582   } else {
2583     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2584   }
2585   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2586   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2587   PetscFunctionReturn(PETSC_SUCCESS);
2588 }
2589 
2590 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2591 {
2592   PetscFunctionBegin;
2593   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2594   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2595   PetscFunctionReturn(PETSC_SUCCESS);
2596 }
2597 
2598 /*@
2599    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2600 
2601    Not Collective
2602 
2603    Input Parameter:
2604 .    A - the matrix
2605 
2606    Output Parameter:
2607 .    nz - the number of nonzeros
2608 
2609  Level: advanced
2610 
2611 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `Mat`
2612 @*/
2613 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2614 {
2615   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2616   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2617   PetscBool   isaij;
2618 
2619   PetscFunctionBegin;
2620   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2621   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2622   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2623   PetscFunctionReturn(PETSC_SUCCESS);
2624 }
2625 
2626 /*@
2627    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2628 
2629    Collective
2630 
2631    Input Parameters:
2632 +    A - the matrix
2633 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2634 
2635  Level: advanced
2636 
2637 .seealso: [](chapter_matrices), `Mat`, `Mat`, `MATMPIAIJ`
2638 @*/
2639 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2640 {
2641   PetscFunctionBegin;
2642   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2643   PetscFunctionReturn(PETSC_SUCCESS);
2644 }
2645 
2646 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2647 {
2648   PetscBool sc = PETSC_FALSE, flg;
2649 
2650   PetscFunctionBegin;
2651   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2652   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2653   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2654   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2655   PetscOptionsHeadEnd();
2656   PetscFunctionReturn(PETSC_SUCCESS);
2657 }
2658 
2659 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2660 {
2661   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2662   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2663 
2664   PetscFunctionBegin;
2665   if (!Y->preallocated) {
2666     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2667   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2668     PetscInt nonew = aij->nonew;
2669     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2670     aij->nonew = nonew;
2671   }
2672   PetscCall(MatShift_Basic(Y, a));
2673   PetscFunctionReturn(PETSC_SUCCESS);
2674 }
2675 
2676 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2677 {
2678   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2679 
2680   PetscFunctionBegin;
2681   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2682   PetscCall(MatMissingDiagonal(a->A, missing, d));
2683   if (d) {
2684     PetscInt rstart;
2685     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2686     *d += rstart;
2687   }
2688   PetscFunctionReturn(PETSC_SUCCESS);
2689 }
2690 
2691 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2692 {
2693   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2694 
2695   PetscFunctionBegin;
2696   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2697   PetscFunctionReturn(PETSC_SUCCESS);
2698 }
2699 
2700 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A)
2701 {
2702   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2703 
2704   PetscFunctionBegin;
2705   PetscCall(MatEliminateZeros(a->A));
2706   PetscCall(MatEliminateZeros(a->B));
2707   PetscFunctionReturn(PETSC_SUCCESS);
2708 }
2709 
2710 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2711                                        MatGetRow_MPIAIJ,
2712                                        MatRestoreRow_MPIAIJ,
2713                                        MatMult_MPIAIJ,
2714                                        /* 4*/ MatMultAdd_MPIAIJ,
2715                                        MatMultTranspose_MPIAIJ,
2716                                        MatMultTransposeAdd_MPIAIJ,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        /*10*/ NULL,
2721                                        NULL,
2722                                        NULL,
2723                                        MatSOR_MPIAIJ,
2724                                        MatTranspose_MPIAIJ,
2725                                        /*15*/ MatGetInfo_MPIAIJ,
2726                                        MatEqual_MPIAIJ,
2727                                        MatGetDiagonal_MPIAIJ,
2728                                        MatDiagonalScale_MPIAIJ,
2729                                        MatNorm_MPIAIJ,
2730                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2731                                        MatAssemblyEnd_MPIAIJ,
2732                                        MatSetOption_MPIAIJ,
2733                                        MatZeroEntries_MPIAIJ,
2734                                        /*24*/ MatZeroRows_MPIAIJ,
2735                                        NULL,
2736                                        NULL,
2737                                        NULL,
2738                                        NULL,
2739                                        /*29*/ MatSetUp_MPI_Hash,
2740                                        NULL,
2741                                        NULL,
2742                                        MatGetDiagonalBlock_MPIAIJ,
2743                                        NULL,
2744                                        /*34*/ MatDuplicate_MPIAIJ,
2745                                        NULL,
2746                                        NULL,
2747                                        NULL,
2748                                        NULL,
2749                                        /*39*/ MatAXPY_MPIAIJ,
2750                                        MatCreateSubMatrices_MPIAIJ,
2751                                        MatIncreaseOverlap_MPIAIJ,
2752                                        MatGetValues_MPIAIJ,
2753                                        MatCopy_MPIAIJ,
2754                                        /*44*/ MatGetRowMax_MPIAIJ,
2755                                        MatScale_MPIAIJ,
2756                                        MatShift_MPIAIJ,
2757                                        MatDiagonalSet_MPIAIJ,
2758                                        MatZeroRowsColumns_MPIAIJ,
2759                                        /*49*/ MatSetRandom_MPIAIJ,
2760                                        MatGetRowIJ_MPIAIJ,
2761                                        MatRestoreRowIJ_MPIAIJ,
2762                                        NULL,
2763                                        NULL,
2764                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2765                                        NULL,
2766                                        MatSetUnfactored_MPIAIJ,
2767                                        MatPermute_MPIAIJ,
2768                                        NULL,
2769                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2770                                        MatDestroy_MPIAIJ,
2771                                        MatView_MPIAIJ,
2772                                        NULL,
2773                                        NULL,
2774                                        /*64*/ NULL,
2775                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        NULL,
2779                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2780                                        MatGetRowMinAbs_MPIAIJ,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                        NULL,
2785                                        /*75*/ MatFDColoringApply_AIJ,
2786                                        MatSetFromOptions_MPIAIJ,
2787                                        NULL,
2788                                        NULL,
2789                                        MatFindZeroDiagonals_MPIAIJ,
2790                                        /*80*/ NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*83*/ MatLoad_MPIAIJ,
2794                                        MatIsSymmetric_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*89*/ NULL,
2800                                        NULL,
2801                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2805                                        NULL,
2806                                        NULL,
2807                                        NULL,
2808                                        MatBindToCPU_MPIAIJ,
2809                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        MatConjugate_MPIAIJ,
2813                                        NULL,
2814                                        /*104*/ MatSetValuesRow_MPIAIJ,
2815                                        MatRealPart_MPIAIJ,
2816                                        MatImaginaryPart_MPIAIJ,
2817                                        NULL,
2818                                        NULL,
2819                                        /*109*/ NULL,
2820                                        NULL,
2821                                        MatGetRowMin_MPIAIJ,
2822                                        NULL,
2823                                        MatMissingDiagonal_MPIAIJ,
2824                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2825                                        NULL,
2826                                        MatGetGhosts_MPIAIJ,
2827                                        NULL,
2828                                        NULL,
2829                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2830                                        NULL,
2831                                        NULL,
2832                                        NULL,
2833                                        MatGetMultiProcBlock_MPIAIJ,
2834                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2835                                        MatGetColumnReductions_MPIAIJ,
2836                                        MatInvertBlockDiagonal_MPIAIJ,
2837                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2838                                        MatCreateSubMatricesMPI_MPIAIJ,
2839                                        /*129*/ NULL,
2840                                        NULL,
2841                                        NULL,
2842                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2843                                        NULL,
2844                                        /*134*/ NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        NULL,
2848                                        NULL,
2849                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2850                                        NULL,
2851                                        NULL,
2852                                        MatFDColoringSetUp_MPIXAIJ,
2853                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2854                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2855                                        /*145*/ NULL,
2856                                        NULL,
2857                                        NULL,
2858                                        MatCreateGraph_Simple_AIJ,
2859                                        NULL,
2860                                        /*150*/ NULL,
2861                                        MatEliminateZeros_MPIAIJ};
2862 
2863 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2864 {
2865   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2866 
2867   PetscFunctionBegin;
2868   PetscCall(MatStoreValues(aij->A));
2869   PetscCall(MatStoreValues(aij->B));
2870   PetscFunctionReturn(PETSC_SUCCESS);
2871 }
2872 
2873 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2874 {
2875   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2876 
2877   PetscFunctionBegin;
2878   PetscCall(MatRetrieveValues(aij->A));
2879   PetscCall(MatRetrieveValues(aij->B));
2880   PetscFunctionReturn(PETSC_SUCCESS);
2881 }
2882 
2883 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2884 {
2885   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2886   PetscMPIInt size;
2887 
2888   PetscFunctionBegin;
2889   if (B->hash_active) {
2890     PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops))));
2891     B->hash_active = PETSC_FALSE;
2892   }
2893   PetscCall(PetscLayoutSetUp(B->rmap));
2894   PetscCall(PetscLayoutSetUp(B->cmap));
2895 
2896 #if defined(PETSC_USE_CTABLE)
2897   PetscCall(PetscHMapIDestroy(&b->colmap));
2898 #else
2899   PetscCall(PetscFree(b->colmap));
2900 #endif
2901   PetscCall(PetscFree(b->garray));
2902   PetscCall(VecDestroy(&b->lvec));
2903   PetscCall(VecScatterDestroy(&b->Mvctx));
2904 
2905   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2906   PetscCall(MatDestroy(&b->B));
2907   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2908   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2909   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2910   PetscCall(MatSetType(b->B, MATSEQAIJ));
2911 
2912   PetscCall(MatDestroy(&b->A));
2913   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2914   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2915   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2916   PetscCall(MatSetType(b->A, MATSEQAIJ));
2917 
2918   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2919   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2920   B->preallocated  = PETSC_TRUE;
2921   B->was_assembled = PETSC_FALSE;
2922   B->assembled     = PETSC_FALSE;
2923   PetscFunctionReturn(PETSC_SUCCESS);
2924 }
2925 
2926 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2927 {
2928   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2929 
2930   PetscFunctionBegin;
2931   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2932   PetscCall(PetscLayoutSetUp(B->rmap));
2933   PetscCall(PetscLayoutSetUp(B->cmap));
2934 
2935 #if defined(PETSC_USE_CTABLE)
2936   PetscCall(PetscHMapIDestroy(&b->colmap));
2937 #else
2938   PetscCall(PetscFree(b->colmap));
2939 #endif
2940   PetscCall(PetscFree(b->garray));
2941   PetscCall(VecDestroy(&b->lvec));
2942   PetscCall(VecScatterDestroy(&b->Mvctx));
2943 
2944   PetscCall(MatResetPreallocation(b->A));
2945   PetscCall(MatResetPreallocation(b->B));
2946   B->preallocated  = PETSC_TRUE;
2947   B->was_assembled = PETSC_FALSE;
2948   B->assembled     = PETSC_FALSE;
2949   PetscFunctionReturn(PETSC_SUCCESS);
2950 }
2951 
2952 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2953 {
2954   Mat         mat;
2955   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2956 
2957   PetscFunctionBegin;
2958   *newmat = NULL;
2959   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2960   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2961   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2962   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2963   a = (Mat_MPIAIJ *)mat->data;
2964 
2965   mat->factortype   = matin->factortype;
2966   mat->assembled    = matin->assembled;
2967   mat->insertmode   = NOT_SET_VALUES;
2968   mat->preallocated = matin->preallocated;
2969 
2970   a->size         = oldmat->size;
2971   a->rank         = oldmat->rank;
2972   a->donotstash   = oldmat->donotstash;
2973   a->roworiented  = oldmat->roworiented;
2974   a->rowindices   = NULL;
2975   a->rowvalues    = NULL;
2976   a->getrowactive = PETSC_FALSE;
2977 
2978   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2979   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2980 
2981   if (oldmat->colmap) {
2982 #if defined(PETSC_USE_CTABLE)
2983     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2984 #else
2985     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2986     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2987 #endif
2988   } else a->colmap = NULL;
2989   if (oldmat->garray) {
2990     PetscInt len;
2991     len = oldmat->B->cmap->n;
2992     PetscCall(PetscMalloc1(len + 1, &a->garray));
2993     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2994   } else a->garray = NULL;
2995 
2996   /* It may happen MatDuplicate is called with a non-assembled matrix
2997      In fact, MatDuplicate only requires the matrix to be preallocated
2998      This may happen inside a DMCreateMatrix_Shell */
2999   if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3000   if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3001   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3002   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3003   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3004   *newmat = mat;
3005   PetscFunctionReturn(PETSC_SUCCESS);
3006 }
3007 
3008 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3009 {
3010   PetscBool isbinary, ishdf5;
3011 
3012   PetscFunctionBegin;
3013   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3014   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3015   /* force binary viewer to load .info file if it has not yet done so */
3016   PetscCall(PetscViewerSetUp(viewer));
3017   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3018   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3019   if (isbinary) {
3020     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3021   } else if (ishdf5) {
3022 #if defined(PETSC_HAVE_HDF5)
3023     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3024 #else
3025     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3026 #endif
3027   } else {
3028     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3029   }
3030   PetscFunctionReturn(PETSC_SUCCESS);
3031 }
3032 
3033 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3034 {
3035   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3036   PetscInt    *rowidxs, *colidxs;
3037   PetscScalar *matvals;
3038 
3039   PetscFunctionBegin;
3040   PetscCall(PetscViewerSetUp(viewer));
3041 
3042   /* read in matrix header */
3043   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3044   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3045   M  = header[1];
3046   N  = header[2];
3047   nz = header[3];
3048   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3049   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3050   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3051 
3052   /* set block sizes from the viewer's .info file */
3053   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3054   /* set global sizes if not set already */
3055   if (mat->rmap->N < 0) mat->rmap->N = M;
3056   if (mat->cmap->N < 0) mat->cmap->N = N;
3057   PetscCall(PetscLayoutSetUp(mat->rmap));
3058   PetscCall(PetscLayoutSetUp(mat->cmap));
3059 
3060   /* check if the matrix sizes are correct */
3061   PetscCall(MatGetSize(mat, &rows, &cols));
3062   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3063 
3064   /* read in row lengths and build row indices */
3065   PetscCall(MatGetLocalSize(mat, &m, NULL));
3066   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3067   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3068   rowidxs[0] = 0;
3069   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3070   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3071   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3072   /* read in column indices and matrix values */
3073   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3074   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3075   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3076   /* store matrix indices and values */
3077   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3078   PetscCall(PetscFree(rowidxs));
3079   PetscCall(PetscFree2(colidxs, matvals));
3080   PetscFunctionReturn(PETSC_SUCCESS);
3081 }
3082 
3083 /* Not scalable because of ISAllGather() unless getting all columns. */
3084 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3085 {
3086   IS          iscol_local;
3087   PetscBool   isstride;
3088   PetscMPIInt lisstride = 0, gisstride;
3089 
3090   PetscFunctionBegin;
3091   /* check if we are grabbing all columns*/
3092   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3093 
3094   if (isstride) {
3095     PetscInt start, len, mstart, mlen;
3096     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3097     PetscCall(ISGetLocalSize(iscol, &len));
3098     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3099     if (mstart == start && mlen - mstart == len) lisstride = 1;
3100   }
3101 
3102   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3103   if (gisstride) {
3104     PetscInt N;
3105     PetscCall(MatGetSize(mat, NULL, &N));
3106     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3107     PetscCall(ISSetIdentity(iscol_local));
3108     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3109   } else {
3110     PetscInt cbs;
3111     PetscCall(ISGetBlockSize(iscol, &cbs));
3112     PetscCall(ISAllGather(iscol, &iscol_local));
3113     PetscCall(ISSetBlockSize(iscol_local, cbs));
3114   }
3115 
3116   *isseq = iscol_local;
3117   PetscFunctionReturn(PETSC_SUCCESS);
3118 }
3119 
3120 /*
3121  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3122  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3123 
3124  Input Parameters:
3125 +   mat - matrix
3126 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3127            i.e., mat->rstart <= isrow[i] < mat->rend
3128 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3129            i.e., mat->cstart <= iscol[i] < mat->cend
3130 
3131  Output Parameters:
3132 +   isrow_d - sequential row index set for retrieving mat->A
3133 .   iscol_d - sequential  column index set for retrieving mat->A
3134 .   iscol_o - sequential column index set for retrieving mat->B
3135 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3136  */
3137 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3138 {
3139   Vec             x, cmap;
3140   const PetscInt *is_idx;
3141   PetscScalar    *xarray, *cmaparray;
3142   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3143   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3144   Mat             B    = a->B;
3145   Vec             lvec = a->lvec, lcmap;
3146   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3147   MPI_Comm        comm;
3148   VecScatter      Mvctx = a->Mvctx;
3149 
3150   PetscFunctionBegin;
3151   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3152   PetscCall(ISGetLocalSize(iscol, &ncols));
3153 
3154   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3155   PetscCall(MatCreateVecs(mat, &x, NULL));
3156   PetscCall(VecSet(x, -1.0));
3157   PetscCall(VecDuplicate(x, &cmap));
3158   PetscCall(VecSet(cmap, -1.0));
3159 
3160   /* Get start indices */
3161   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3162   isstart -= ncols;
3163   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3164 
3165   PetscCall(ISGetIndices(iscol, &is_idx));
3166   PetscCall(VecGetArray(x, &xarray));
3167   PetscCall(VecGetArray(cmap, &cmaparray));
3168   PetscCall(PetscMalloc1(ncols, &idx));
3169   for (i = 0; i < ncols; i++) {
3170     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3171     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3172     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3173   }
3174   PetscCall(VecRestoreArray(x, &xarray));
3175   PetscCall(VecRestoreArray(cmap, &cmaparray));
3176   PetscCall(ISRestoreIndices(iscol, &is_idx));
3177 
3178   /* Get iscol_d */
3179   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3180   PetscCall(ISGetBlockSize(iscol, &i));
3181   PetscCall(ISSetBlockSize(*iscol_d, i));
3182 
3183   /* Get isrow_d */
3184   PetscCall(ISGetLocalSize(isrow, &m));
3185   rstart = mat->rmap->rstart;
3186   PetscCall(PetscMalloc1(m, &idx));
3187   PetscCall(ISGetIndices(isrow, &is_idx));
3188   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3189   PetscCall(ISRestoreIndices(isrow, &is_idx));
3190 
3191   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3192   PetscCall(ISGetBlockSize(isrow, &i));
3193   PetscCall(ISSetBlockSize(*isrow_d, i));
3194 
3195   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3196   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3197   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3198 
3199   PetscCall(VecDuplicate(lvec, &lcmap));
3200 
3201   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3202   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3203 
3204   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3205   /* off-process column indices */
3206   count = 0;
3207   PetscCall(PetscMalloc1(Bn, &idx));
3208   PetscCall(PetscMalloc1(Bn, &cmap1));
3209 
3210   PetscCall(VecGetArray(lvec, &xarray));
3211   PetscCall(VecGetArray(lcmap, &cmaparray));
3212   for (i = 0; i < Bn; i++) {
3213     if (PetscRealPart(xarray[i]) > -1.0) {
3214       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3215       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3216       count++;
3217     }
3218   }
3219   PetscCall(VecRestoreArray(lvec, &xarray));
3220   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3221 
3222   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3223   /* cannot ensure iscol_o has same blocksize as iscol! */
3224 
3225   PetscCall(PetscFree(idx));
3226   *garray = cmap1;
3227 
3228   PetscCall(VecDestroy(&x));
3229   PetscCall(VecDestroy(&cmap));
3230   PetscCall(VecDestroy(&lcmap));
3231   PetscFunctionReturn(PETSC_SUCCESS);
3232 }
3233 
3234 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3235 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3236 {
3237   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3238   Mat         M = NULL;
3239   MPI_Comm    comm;
3240   IS          iscol_d, isrow_d, iscol_o;
3241   Mat         Asub = NULL, Bsub = NULL;
3242   PetscInt    n;
3243 
3244   PetscFunctionBegin;
3245   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3246 
3247   if (call == MAT_REUSE_MATRIX) {
3248     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3249     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3250     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3251 
3252     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3253     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3254 
3255     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3256     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3257 
3258     /* Update diagonal and off-diagonal portions of submat */
3259     asub = (Mat_MPIAIJ *)(*submat)->data;
3260     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3261     PetscCall(ISGetLocalSize(iscol_o, &n));
3262     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3263     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3264     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3265 
3266   } else { /* call == MAT_INITIAL_MATRIX) */
3267     const PetscInt *garray;
3268     PetscInt        BsubN;
3269 
3270     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3271     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3272 
3273     /* Create local submatrices Asub and Bsub */
3274     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3275     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3276 
3277     /* Create submatrix M */
3278     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3279 
3280     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3281     asub = (Mat_MPIAIJ *)M->data;
3282 
3283     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3284     n = asub->B->cmap->N;
3285     if (BsubN > n) {
3286       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3287       const PetscInt *idx;
3288       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3289       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3290 
3291       PetscCall(PetscMalloc1(n, &idx_new));
3292       j = 0;
3293       PetscCall(ISGetIndices(iscol_o, &idx));
3294       for (i = 0; i < n; i++) {
3295         if (j >= BsubN) break;
3296         while (subgarray[i] > garray[j]) j++;
3297 
3298         if (subgarray[i] == garray[j]) {
3299           idx_new[i] = idx[j++];
3300         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3301       }
3302       PetscCall(ISRestoreIndices(iscol_o, &idx));
3303 
3304       PetscCall(ISDestroy(&iscol_o));
3305       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3306 
3307     } else if (BsubN < n) {
3308       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3309     }
3310 
3311     PetscCall(PetscFree(garray));
3312     *submat = M;
3313 
3314     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3315     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3316     PetscCall(ISDestroy(&isrow_d));
3317 
3318     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3319     PetscCall(ISDestroy(&iscol_d));
3320 
3321     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3322     PetscCall(ISDestroy(&iscol_o));
3323   }
3324   PetscFunctionReturn(PETSC_SUCCESS);
3325 }
3326 
3327 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3328 {
3329   IS        iscol_local = NULL, isrow_d;
3330   PetscInt  csize;
3331   PetscInt  n, i, j, start, end;
3332   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3333   MPI_Comm  comm;
3334 
3335   PetscFunctionBegin;
3336   /* If isrow has same processor distribution as mat,
3337      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3338   if (call == MAT_REUSE_MATRIX) {
3339     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3340     if (isrow_d) {
3341       sameRowDist  = PETSC_TRUE;
3342       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3343     } else {
3344       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3345       if (iscol_local) {
3346         sameRowDist  = PETSC_TRUE;
3347         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3348       }
3349     }
3350   } else {
3351     /* Check if isrow has same processor distribution as mat */
3352     sameDist[0] = PETSC_FALSE;
3353     PetscCall(ISGetLocalSize(isrow, &n));
3354     if (!n) {
3355       sameDist[0] = PETSC_TRUE;
3356     } else {
3357       PetscCall(ISGetMinMax(isrow, &i, &j));
3358       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3359       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3360     }
3361 
3362     /* Check if iscol has same processor distribution as mat */
3363     sameDist[1] = PETSC_FALSE;
3364     PetscCall(ISGetLocalSize(iscol, &n));
3365     if (!n) {
3366       sameDist[1] = PETSC_TRUE;
3367     } else {
3368       PetscCall(ISGetMinMax(iscol, &i, &j));
3369       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3370       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3371     }
3372 
3373     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3374     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3375     sameRowDist = tsameDist[0];
3376   }
3377 
3378   if (sameRowDist) {
3379     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3380       /* isrow and iscol have same processor distribution as mat */
3381       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3382       PetscFunctionReturn(PETSC_SUCCESS);
3383     } else { /* sameRowDist */
3384       /* isrow has same processor distribution as mat */
3385       if (call == MAT_INITIAL_MATRIX) {
3386         PetscBool sorted;
3387         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3388         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3389         PetscCall(ISGetSize(iscol, &i));
3390         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3391 
3392         PetscCall(ISSorted(iscol_local, &sorted));
3393         if (sorted) {
3394           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3395           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3396           PetscFunctionReturn(PETSC_SUCCESS);
3397         }
3398       } else { /* call == MAT_REUSE_MATRIX */
3399         IS iscol_sub;
3400         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3401         if (iscol_sub) {
3402           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3403           PetscFunctionReturn(PETSC_SUCCESS);
3404         }
3405       }
3406     }
3407   }
3408 
3409   /* General case: iscol -> iscol_local which has global size of iscol */
3410   if (call == MAT_REUSE_MATRIX) {
3411     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3412     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3413   } else {
3414     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3415   }
3416 
3417   PetscCall(ISGetLocalSize(iscol, &csize));
3418   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3419 
3420   if (call == MAT_INITIAL_MATRIX) {
3421     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3422     PetscCall(ISDestroy(&iscol_local));
3423   }
3424   PetscFunctionReturn(PETSC_SUCCESS);
3425 }
3426 
3427 /*@C
3428      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3429          and "off-diagonal" part of the matrix in CSR format.
3430 
3431    Collective
3432 
3433    Input Parameters:
3434 +  comm - MPI communicator
3435 .  A - "diagonal" portion of matrix
3436 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3437 -  garray - global index of `B` columns
3438 
3439    Output Parameter:
3440 .   mat - the matrix, with input `A` as its local diagonal matrix
3441 
3442   Level: advanced
3443 
3444    Notes:
3445    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3446 
3447    `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3448 
3449 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3450 @*/
3451 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3452 {
3453   Mat_MPIAIJ        *maij;
3454   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3455   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3456   const PetscScalar *oa;
3457   Mat                Bnew;
3458   PetscInt           m, n, N;
3459   MatType            mpi_mat_type;
3460 
3461   PetscFunctionBegin;
3462   PetscCall(MatCreate(comm, mat));
3463   PetscCall(MatGetSize(A, &m, &n));
3464   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3465   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3466   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3467   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3468 
3469   /* Get global columns of mat */
3470   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3471 
3472   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3473   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3474   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3475   PetscCall(MatSetType(*mat, mpi_mat_type));
3476 
3477   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3478   maij = (Mat_MPIAIJ *)(*mat)->data;
3479 
3480   (*mat)->preallocated = PETSC_TRUE;
3481 
3482   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3483   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3484 
3485   /* Set A as diagonal portion of *mat */
3486   maij->A = A;
3487 
3488   nz = oi[m];
3489   for (i = 0; i < nz; i++) {
3490     col   = oj[i];
3491     oj[i] = garray[col];
3492   }
3493 
3494   /* Set Bnew as off-diagonal portion of *mat */
3495   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3496   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3497   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3498   bnew        = (Mat_SeqAIJ *)Bnew->data;
3499   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3500   maij->B     = Bnew;
3501 
3502   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3503 
3504   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3505   b->free_a       = PETSC_FALSE;
3506   b->free_ij      = PETSC_FALSE;
3507   PetscCall(MatDestroy(&B));
3508 
3509   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3510   bnew->free_a       = PETSC_TRUE;
3511   bnew->free_ij      = PETSC_TRUE;
3512 
3513   /* condense columns of maij->B */
3514   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3515   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3516   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3517   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3518   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3519   PetscFunctionReturn(PETSC_SUCCESS);
3520 }
3521 
3522 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3523 
3524 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3525 {
3526   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3527   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3528   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3529   Mat             M, Msub, B = a->B;
3530   MatScalar      *aa;
3531   Mat_SeqAIJ     *aij;
3532   PetscInt       *garray = a->garray, *colsub, Ncols;
3533   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3534   IS              iscol_sub, iscmap;
3535   const PetscInt *is_idx, *cmap;
3536   PetscBool       allcolumns = PETSC_FALSE;
3537   MPI_Comm        comm;
3538 
3539   PetscFunctionBegin;
3540   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3541   if (call == MAT_REUSE_MATRIX) {
3542     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3543     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3544     PetscCall(ISGetLocalSize(iscol_sub, &count));
3545 
3546     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3547     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3548 
3549     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3550     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3551 
3552     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3553 
3554   } else { /* call == MAT_INITIAL_MATRIX) */
3555     PetscBool flg;
3556 
3557     PetscCall(ISGetLocalSize(iscol, &n));
3558     PetscCall(ISGetSize(iscol, &Ncols));
3559 
3560     /* (1) iscol -> nonscalable iscol_local */
3561     /* Check for special case: each processor gets entire matrix columns */
3562     PetscCall(ISIdentity(iscol_local, &flg));
3563     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3564     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3565     if (allcolumns) {
3566       iscol_sub = iscol_local;
3567       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3568       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3569 
3570     } else {
3571       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3572       PetscInt *idx, *cmap1, k;
3573       PetscCall(PetscMalloc1(Ncols, &idx));
3574       PetscCall(PetscMalloc1(Ncols, &cmap1));
3575       PetscCall(ISGetIndices(iscol_local, &is_idx));
3576       count = 0;
3577       k     = 0;
3578       for (i = 0; i < Ncols; i++) {
3579         j = is_idx[i];
3580         if (j >= cstart && j < cend) {
3581           /* diagonal part of mat */
3582           idx[count]     = j;
3583           cmap1[count++] = i; /* column index in submat */
3584         } else if (Bn) {
3585           /* off-diagonal part of mat */
3586           if (j == garray[k]) {
3587             idx[count]     = j;
3588             cmap1[count++] = i; /* column index in submat */
3589           } else if (j > garray[k]) {
3590             while (j > garray[k] && k < Bn - 1) k++;
3591             if (j == garray[k]) {
3592               idx[count]     = j;
3593               cmap1[count++] = i; /* column index in submat */
3594             }
3595           }
3596         }
3597       }
3598       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3599 
3600       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3601       PetscCall(ISGetBlockSize(iscol, &cbs));
3602       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3603 
3604       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3605     }
3606 
3607     /* (3) Create sequential Msub */
3608     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3609   }
3610 
3611   PetscCall(ISGetLocalSize(iscol_sub, &count));
3612   aij = (Mat_SeqAIJ *)(Msub)->data;
3613   ii  = aij->i;
3614   PetscCall(ISGetIndices(iscmap, &cmap));
3615 
3616   /*
3617       m - number of local rows
3618       Ncols - number of columns (same on all processors)
3619       rstart - first row in new global matrix generated
3620   */
3621   PetscCall(MatGetSize(Msub, &m, NULL));
3622 
3623   if (call == MAT_INITIAL_MATRIX) {
3624     /* (4) Create parallel newmat */
3625     PetscMPIInt rank, size;
3626     PetscInt    csize;
3627 
3628     PetscCallMPI(MPI_Comm_size(comm, &size));
3629     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3630 
3631     /*
3632         Determine the number of non-zeros in the diagonal and off-diagonal
3633         portions of the matrix in order to do correct preallocation
3634     */
3635 
3636     /* first get start and end of "diagonal" columns */
3637     PetscCall(ISGetLocalSize(iscol, &csize));
3638     if (csize == PETSC_DECIDE) {
3639       PetscCall(ISGetSize(isrow, &mglobal));
3640       if (mglobal == Ncols) { /* square matrix */
3641         nlocal = m;
3642       } else {
3643         nlocal = Ncols / size + ((Ncols % size) > rank);
3644       }
3645     } else {
3646       nlocal = csize;
3647     }
3648     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3649     rstart = rend - nlocal;
3650     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3651 
3652     /* next, compute all the lengths */
3653     jj = aij->j;
3654     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3655     olens = dlens + m;
3656     for (i = 0; i < m; i++) {
3657       jend = ii[i + 1] - ii[i];
3658       olen = 0;
3659       dlen = 0;
3660       for (j = 0; j < jend; j++) {
3661         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3662         else dlen++;
3663         jj++;
3664       }
3665       olens[i] = olen;
3666       dlens[i] = dlen;
3667     }
3668 
3669     PetscCall(ISGetBlockSize(isrow, &bs));
3670     PetscCall(ISGetBlockSize(iscol, &cbs));
3671 
3672     PetscCall(MatCreate(comm, &M));
3673     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3674     PetscCall(MatSetBlockSizes(M, bs, cbs));
3675     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3676     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3677     PetscCall(PetscFree(dlens));
3678 
3679   } else { /* call == MAT_REUSE_MATRIX */
3680     M = *newmat;
3681     PetscCall(MatGetLocalSize(M, &i, NULL));
3682     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3683     PetscCall(MatZeroEntries(M));
3684     /*
3685          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3686        rather than the slower MatSetValues().
3687     */
3688     M->was_assembled = PETSC_TRUE;
3689     M->assembled     = PETSC_FALSE;
3690   }
3691 
3692   /* (5) Set values of Msub to *newmat */
3693   PetscCall(PetscMalloc1(count, &colsub));
3694   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3695 
3696   jj = aij->j;
3697   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3698   for (i = 0; i < m; i++) {
3699     row = rstart + i;
3700     nz  = ii[i + 1] - ii[i];
3701     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3702     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3703     jj += nz;
3704     aa += nz;
3705   }
3706   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3707   PetscCall(ISRestoreIndices(iscmap, &cmap));
3708 
3709   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3710   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3711 
3712   PetscCall(PetscFree(colsub));
3713 
3714   /* save Msub, iscol_sub and iscmap used in processor for next request */
3715   if (call == MAT_INITIAL_MATRIX) {
3716     *newmat = M;
3717     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3718     PetscCall(MatDestroy(&Msub));
3719 
3720     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3721     PetscCall(ISDestroy(&iscol_sub));
3722 
3723     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3724     PetscCall(ISDestroy(&iscmap));
3725 
3726     if (iscol_local) {
3727       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3728       PetscCall(ISDestroy(&iscol_local));
3729     }
3730   }
3731   PetscFunctionReturn(PETSC_SUCCESS);
3732 }
3733 
3734 /*
3735     Not great since it makes two copies of the submatrix, first an SeqAIJ
3736   in local and then by concatenating the local matrices the end result.
3737   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3738 
3739   This requires a sequential iscol with all indices.
3740 */
3741 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3742 {
3743   PetscMPIInt rank, size;
3744   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3745   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3746   Mat         M, Mreuse;
3747   MatScalar  *aa, *vwork;
3748   MPI_Comm    comm;
3749   Mat_SeqAIJ *aij;
3750   PetscBool   colflag, allcolumns = PETSC_FALSE;
3751 
3752   PetscFunctionBegin;
3753   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3754   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3755   PetscCallMPI(MPI_Comm_size(comm, &size));
3756 
3757   /* Check for special case: each processor gets entire matrix columns */
3758   PetscCall(ISIdentity(iscol, &colflag));
3759   PetscCall(ISGetLocalSize(iscol, &n));
3760   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3761   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3762 
3763   if (call == MAT_REUSE_MATRIX) {
3764     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3765     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3766     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3767   } else {
3768     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3769   }
3770 
3771   /*
3772       m - number of local rows
3773       n - number of columns (same on all processors)
3774       rstart - first row in new global matrix generated
3775   */
3776   PetscCall(MatGetSize(Mreuse, &m, &n));
3777   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3778   if (call == MAT_INITIAL_MATRIX) {
3779     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3780     ii  = aij->i;
3781     jj  = aij->j;
3782 
3783     /*
3784         Determine the number of non-zeros in the diagonal and off-diagonal
3785         portions of the matrix in order to do correct preallocation
3786     */
3787 
3788     /* first get start and end of "diagonal" columns */
3789     if (csize == PETSC_DECIDE) {
3790       PetscCall(ISGetSize(isrow, &mglobal));
3791       if (mglobal == n) { /* square matrix */
3792         nlocal = m;
3793       } else {
3794         nlocal = n / size + ((n % size) > rank);
3795       }
3796     } else {
3797       nlocal = csize;
3798     }
3799     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3800     rstart = rend - nlocal;
3801     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3802 
3803     /* next, compute all the lengths */
3804     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3805     olens = dlens + m;
3806     for (i = 0; i < m; i++) {
3807       jend = ii[i + 1] - ii[i];
3808       olen = 0;
3809       dlen = 0;
3810       for (j = 0; j < jend; j++) {
3811         if (*jj < rstart || *jj >= rend) olen++;
3812         else dlen++;
3813         jj++;
3814       }
3815       olens[i] = olen;
3816       dlens[i] = dlen;
3817     }
3818     PetscCall(MatCreate(comm, &M));
3819     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3820     PetscCall(MatSetBlockSizes(M, bs, cbs));
3821     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3822     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3823     PetscCall(PetscFree(dlens));
3824   } else {
3825     PetscInt ml, nl;
3826 
3827     M = *newmat;
3828     PetscCall(MatGetLocalSize(M, &ml, &nl));
3829     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3830     PetscCall(MatZeroEntries(M));
3831     /*
3832          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3833        rather than the slower MatSetValues().
3834     */
3835     M->was_assembled = PETSC_TRUE;
3836     M->assembled     = PETSC_FALSE;
3837   }
3838   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3839   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3840   ii  = aij->i;
3841   jj  = aij->j;
3842 
3843   /* trigger copy to CPU if needed */
3844   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3845   for (i = 0; i < m; i++) {
3846     row   = rstart + i;
3847     nz    = ii[i + 1] - ii[i];
3848     cwork = jj;
3849     jj += nz;
3850     vwork = aa;
3851     aa += nz;
3852     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3853   }
3854   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3855 
3856   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3857   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3858   *newmat = M;
3859 
3860   /* save submatrix used in processor for next request */
3861   if (call == MAT_INITIAL_MATRIX) {
3862     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3863     PetscCall(MatDestroy(&Mreuse));
3864   }
3865   PetscFunctionReturn(PETSC_SUCCESS);
3866 }
3867 
3868 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3869 {
3870   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3871   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3872   const PetscInt *JJ;
3873   PetscBool       nooffprocentries;
3874   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3875 
3876   PetscFunctionBegin;
3877   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3878 
3879   PetscCall(PetscLayoutSetUp(B->rmap));
3880   PetscCall(PetscLayoutSetUp(B->cmap));
3881   m      = B->rmap->n;
3882   cstart = B->cmap->rstart;
3883   cend   = B->cmap->rend;
3884   rstart = B->rmap->rstart;
3885 
3886   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3887 
3888   if (PetscDefined(USE_DEBUG)) {
3889     for (i = 0; i < m; i++) {
3890       nnz = Ii[i + 1] - Ii[i];
3891       JJ  = J + Ii[i];
3892       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3893       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3894       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3895     }
3896   }
3897 
3898   for (i = 0; i < m; i++) {
3899     nnz     = Ii[i + 1] - Ii[i];
3900     JJ      = J + Ii[i];
3901     nnz_max = PetscMax(nnz_max, nnz);
3902     d       = 0;
3903     for (j = 0; j < nnz; j++) {
3904       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3905     }
3906     d_nnz[i] = d;
3907     o_nnz[i] = nnz - d;
3908   }
3909   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3910   PetscCall(PetscFree2(d_nnz, o_nnz));
3911 
3912   for (i = 0; i < m; i++) {
3913     ii = i + rstart;
3914     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3915   }
3916   nooffprocentries    = B->nooffprocentries;
3917   B->nooffprocentries = PETSC_TRUE;
3918   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3919   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3920   B->nooffprocentries = nooffprocentries;
3921 
3922   /* count number of entries below block diagonal */
3923   PetscCall(PetscFree(Aij->ld));
3924   PetscCall(PetscCalloc1(m, &ld));
3925   Aij->ld = ld;
3926   for (i = 0; i < m; i++) {
3927     nnz = Ii[i + 1] - Ii[i];
3928     j   = 0;
3929     while (j < nnz && J[j] < cstart) j++;
3930     ld[i] = j;
3931     J += nnz;
3932   }
3933 
3934   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3935   PetscFunctionReturn(PETSC_SUCCESS);
3936 }
3937 
3938 /*@
3939    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3940    (the default parallel PETSc format).
3941 
3942    Collective
3943 
3944    Input Parameters:
3945 +  B - the matrix
3946 .  i - the indices into j for the start of each local row (starts with zero)
3947 .  j - the column indices for each local row (starts with zero)
3948 -  v - optional values in the matrix
3949 
3950    Level: developer
3951 
3952    Notes:
3953        The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3954      thus you CANNOT change the matrix entries by changing the values of `v` after you have
3955      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3956 
3957        The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3958 
3959        The format which is used for the sparse matrix input, is equivalent to a
3960     row-major ordering.. i.e for the following matrix, the input data expected is
3961     as shown
3962 
3963 .vb
3964         1 0 0
3965         2 0 3     P0
3966        -------
3967         4 5 6     P1
3968 
3969      Process0 [P0] rows_owned=[0,1]
3970         i =  {0,1,3}  [size = nrow+1  = 2+1]
3971         j =  {0,0,2}  [size = 3]
3972         v =  {1,2,3}  [size = 3]
3973 
3974      Process1 [P1] rows_owned=[2]
3975         i =  {0,3}    [size = nrow+1  = 1+1]
3976         j =  {0,1,2}  [size = 3]
3977         v =  {4,5,6}  [size = 3]
3978 .ve
3979 
3980 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3981           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3982 @*/
3983 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3984 {
3985   PetscFunctionBegin;
3986   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3987   PetscFunctionReturn(PETSC_SUCCESS);
3988 }
3989 
3990 /*@C
3991    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3992    (the default parallel PETSc format).  For good matrix assembly performance
3993    the user should preallocate the matrix storage by setting the parameters
3994    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
3995 
3996    Collective
3997 
3998    Input Parameters:
3999 +  B - the matrix
4000 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4001            (same value is used for all local rows)
4002 .  d_nnz - array containing the number of nonzeros in the various rows of the
4003            DIAGONAL portion of the local submatrix (possibly different for each row)
4004            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4005            The size of this array is equal to the number of local rows, i.e 'm'.
4006            For matrices that will be factored, you must leave room for (and set)
4007            the diagonal entry even if it is zero.
4008 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4009            submatrix (same value is used for all local rows).
4010 -  o_nnz - array containing the number of nonzeros in the various rows of the
4011            OFF-DIAGONAL portion of the local submatrix (possibly different for
4012            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4013            structure. The size of this array is equal to the number
4014            of local rows, i.e 'm'.
4015 
4016    Usage:
4017    Consider the following 8x8 matrix with 34 non-zero values, that is
4018    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4019    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4020    as follows
4021 
4022 .vb
4023             1  2  0  |  0  3  0  |  0  4
4024     Proc0   0  5  6  |  7  0  0  |  8  0
4025             9  0 10  | 11  0  0  | 12  0
4026     -------------------------------------
4027            13  0 14  | 15 16 17  |  0  0
4028     Proc1   0 18  0  | 19 20 21  |  0  0
4029             0  0  0  | 22 23  0  | 24  0
4030     -------------------------------------
4031     Proc2  25 26 27  |  0  0 28  | 29  0
4032            30  0  0  | 31 32 33  |  0 34
4033 .ve
4034 
4035    This can be represented as a collection of submatrices as
4036 .vb
4037       A B C
4038       D E F
4039       G H I
4040 .ve
4041 
4042    Where the submatrices A,B,C are owned by proc0, D,E,F are
4043    owned by proc1, G,H,I are owned by proc2.
4044 
4045    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4046    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4047    The 'M','N' parameters are 8,8, and have the same values on all procs.
4048 
4049    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4050    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4051    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4052    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4053    part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4054    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4055 
4056    When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4057    allocated for every row of the local diagonal submatrix, and `o_nz`
4058    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4059    One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4060    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4061    In this case, the values of `d_nz`, `o_nz` are
4062 .vb
4063      proc0  dnz = 2, o_nz = 2
4064      proc1  dnz = 3, o_nz = 2
4065      proc2  dnz = 1, o_nz = 4
4066 .ve
4067    We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4068    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4069    for proc3. i.e we are using 12+15+10=37 storage locations to store
4070    34 values.
4071 
4072    When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4073    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4074    In the above case the values for `d_nnz`, `o_nnz` are
4075 .vb
4076      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4077      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4078      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4079 .ve
4080    Here the space allocated is sum of all the above values i.e 34, and
4081    hence pre-allocation is perfect.
4082 
4083    Level: intermediate
4084 
4085    Notes:
4086    If the *_nnz parameter is given then the *_nz parameter is ignored
4087 
4088    The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4089    storage.  The stored row and column indices begin with zero.
4090    See [Sparse Matrices](sec_matsparse) for details.
4091 
4092    The parallel matrix is partitioned such that the first m0 rows belong to
4093    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4094    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4095 
4096    The DIAGONAL portion of the local submatrix of a processor can be defined
4097    as the submatrix which is obtained by extraction the part corresponding to
4098    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4099    first row that belongs to the processor, r2 is the last row belonging to
4100    the this processor, and c1-c2 is range of indices of the local part of a
4101    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4102    common case of a square matrix, the row and column ranges are the same and
4103    the DIAGONAL part is also square. The remaining portion of the local
4104    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4105 
4106    If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4107 
4108    You can call `MatGetInfo()` to get information on how effective the preallocation was;
4109    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4110    You can also run with the option `-info` and look for messages with the string
4111    malloc in them to see if additional memory allocation was needed.
4112 
4113 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4114           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4115 @*/
4116 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4117 {
4118   PetscFunctionBegin;
4119   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4120   PetscValidType(B, 1);
4121   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4122   PetscFunctionReturn(PETSC_SUCCESS);
4123 }
4124 
4125 /*@
4126      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4127          CSR format for the local rows.
4128 
4129    Collective
4130 
4131    Input Parameters:
4132 +  comm - MPI communicator
4133 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4134 .  n - This value should be the same as the local size used in creating the
4135        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4136        calculated if N is given) For square matrices n is almost always m.
4137 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4138 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4139 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4140 .   j - column indices
4141 -   a - optional matrix values
4142 
4143    Output Parameter:
4144 .   mat - the matrix
4145 
4146    Level: intermediate
4147 
4148    Notes:
4149        The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4150      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4151      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4152 
4153        The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4154 
4155        The format which is used for the sparse matrix input, is equivalent to a
4156     row-major ordering.. i.e for the following matrix, the input data expected is
4157     as shown
4158 
4159        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4160 .vb
4161         1 0 0
4162         2 0 3     P0
4163        -------
4164         4 5 6     P1
4165 
4166      Process0 [P0] rows_owned=[0,1]
4167         i =  {0,1,3}  [size = nrow+1  = 2+1]
4168         j =  {0,0,2}  [size = 3]
4169         v =  {1,2,3}  [size = 3]
4170 
4171      Process1 [P1] rows_owned=[2]
4172         i =  {0,3}    [size = nrow+1  = 1+1]
4173         j =  {0,1,2}  [size = 3]
4174         v =  {4,5,6}  [size = 3]
4175 .ve
4176 
4177 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4178           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4179 @*/
4180 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4181 {
4182   PetscFunctionBegin;
4183   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4184   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4185   PetscCall(MatCreate(comm, mat));
4186   PetscCall(MatSetSizes(*mat, m, n, M, N));
4187   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4188   PetscCall(MatSetType(*mat, MATMPIAIJ));
4189   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4190   PetscFunctionReturn(PETSC_SUCCESS);
4191 }
4192 
4193 /*@
4194      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4195      CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4196      from `MatCreateMPIAIJWithArrays()`
4197 
4198      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4199 
4200    Collective
4201 
4202    Input Parameters:
4203 +  mat - the matrix
4204 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4205 .  n - This value should be the same as the local size used in creating the
4206        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4207        calculated if N is given) For square matrices n is almost always m.
4208 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4209 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4210 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4211 .  J - column indices
4212 -  v - matrix values
4213 
4214    Level: deprecated
4215 
4216 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4217           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4218 @*/
4219 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4220 {
4221   PetscInt        nnz, i;
4222   PetscBool       nooffprocentries;
4223   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4224   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4225   PetscScalar    *ad, *ao;
4226   PetscInt        ldi, Iii, md;
4227   const PetscInt *Adi = Ad->i;
4228   PetscInt       *ld  = Aij->ld;
4229 
4230   PetscFunctionBegin;
4231   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4232   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4233   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4234   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4235 
4236   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4237   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4238 
4239   for (i = 0; i < m; i++) {
4240     nnz = Ii[i + 1] - Ii[i];
4241     Iii = Ii[i];
4242     ldi = ld[i];
4243     md  = Adi[i + 1] - Adi[i];
4244     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4245     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4246     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4247     ad += md;
4248     ao += nnz - md;
4249   }
4250   nooffprocentries      = mat->nooffprocentries;
4251   mat->nooffprocentries = PETSC_TRUE;
4252   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4253   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4254   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4255   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4256   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4257   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4258   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4259   mat->nooffprocentries = nooffprocentries;
4260   PetscFunctionReturn(PETSC_SUCCESS);
4261 }
4262 
4263 /*@
4264      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4265 
4266    Collective
4267 
4268    Input Parameters:
4269 +  mat - the matrix
4270 -  v - matrix values, stored by row
4271 
4272    Level: intermediate
4273 
4274    Note:
4275    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4276 
4277 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4278           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4279 @*/
4280 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4281 {
4282   PetscInt        nnz, i, m;
4283   PetscBool       nooffprocentries;
4284   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4285   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4286   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4287   PetscScalar    *ad, *ao;
4288   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4289   PetscInt        ldi, Iii, md;
4290   PetscInt       *ld = Aij->ld;
4291 
4292   PetscFunctionBegin;
4293   m = mat->rmap->n;
4294 
4295   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4296   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4297   Iii = 0;
4298   for (i = 0; i < m; i++) {
4299     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4300     ldi = ld[i];
4301     md  = Adi[i + 1] - Adi[i];
4302     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4303     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4304     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4305     ad += md;
4306     ao += nnz - md;
4307     Iii += nnz;
4308   }
4309   nooffprocentries      = mat->nooffprocentries;
4310   mat->nooffprocentries = PETSC_TRUE;
4311   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4312   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4313   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4314   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4315   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4316   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4317   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4318   mat->nooffprocentries = nooffprocentries;
4319   PetscFunctionReturn(PETSC_SUCCESS);
4320 }
4321 
4322 /*@C
4323    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4324    (the default parallel PETSc format).  For good matrix assembly performance
4325    the user should preallocate the matrix storage by setting the parameters
4326    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4327 
4328    Collective
4329 
4330    Input Parameters:
4331 +  comm - MPI communicator
4332 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4333            This value should be the same as the local size used in creating the
4334            y vector for the matrix-vector product y = Ax.
4335 .  n - This value should be the same as the local size used in creating the
4336        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4337        calculated if N is given) For square matrices n is almost always m.
4338 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4339 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4340 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4341            (same value is used for all local rows)
4342 .  d_nnz - array containing the number of nonzeros in the various rows of the
4343            DIAGONAL portion of the local submatrix (possibly different for each row)
4344            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4345            The size of this array is equal to the number of local rows, i.e 'm'.
4346 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4347            submatrix (same value is used for all local rows).
4348 -  o_nnz - array containing the number of nonzeros in the various rows of the
4349            OFF-DIAGONAL portion of the local submatrix (possibly different for
4350            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4351            structure. The size of this array is equal to the number
4352            of local rows, i.e 'm'.
4353 
4354    Output Parameter:
4355 .  A - the matrix
4356 
4357    Options Database Keys:
4358 +  -mat_no_inode  - Do not use inodes
4359 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4360 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4361         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4362         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4363 
4364    Level: intermediate
4365 
4366    Notes:
4367    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4368    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4369    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4370 
4371    If the *_nnz parameter is given then the *_nz parameter is ignored
4372 
4373    The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4374    processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4375    storage requirements for this matrix.
4376 
4377    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4378    processor than it must be used on all processors that share the object for
4379    that argument.
4380 
4381    The user MUST specify either the local or global matrix dimensions
4382    (possibly both).
4383 
4384    The parallel matrix is partitioned across processors such that the
4385    first m0 rows belong to process 0, the next m1 rows belong to
4386    process 1, the next m2 rows belong to process 2 etc.. where
4387    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4388    values corresponding to [m x N] submatrix.
4389 
4390    The columns are logically partitioned with the n0 columns belonging
4391    to 0th partition, the next n1 columns belonging to the next
4392    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4393 
4394    The DIAGONAL portion of the local submatrix on any given processor
4395    is the submatrix corresponding to the rows and columns m,n
4396    corresponding to the given processor. i.e diagonal matrix on
4397    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4398    etc. The remaining portion of the local submatrix [m x (N-n)]
4399    constitute the OFF-DIAGONAL portion. The example below better
4400    illustrates this concept.
4401 
4402    For a square global matrix we define each processor's diagonal portion
4403    to be its local rows and the corresponding columns (a square submatrix);
4404    each processor's off-diagonal portion encompasses the remainder of the
4405    local matrix (a rectangular submatrix).
4406 
4407    If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4408 
4409    When calling this routine with a single process communicator, a matrix of
4410    type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4411    type of communicator, use the construction mechanism
4412 .vb
4413      MatCreate(...,&A);
4414      MatSetType(A,MATMPIAIJ);
4415      MatSetSizes(A, m,n,M,N);
4416      MatMPIAIJSetPreallocation(A,...);
4417 .ve
4418 
4419    By default, this format uses inodes (identical nodes) when possible.
4420    We search for consecutive rows with the same nonzero structure, thereby
4421    reusing matrix information to achieve increased efficiency.
4422 
4423    Usage:
4424    Consider the following 8x8 matrix with 34 non-zero values, that is
4425    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4426    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4427    as follows
4428 
4429 .vb
4430             1  2  0  |  0  3  0  |  0  4
4431     Proc0   0  5  6  |  7  0  0  |  8  0
4432             9  0 10  | 11  0  0  | 12  0
4433     -------------------------------------
4434            13  0 14  | 15 16 17  |  0  0
4435     Proc1   0 18  0  | 19 20 21  |  0  0
4436             0  0  0  | 22 23  0  | 24  0
4437     -------------------------------------
4438     Proc2  25 26 27  |  0  0 28  | 29  0
4439            30  0  0  | 31 32 33  |  0 34
4440 .ve
4441 
4442    This can be represented as a collection of submatrices as
4443 
4444 .vb
4445       A B C
4446       D E F
4447       G H I
4448 .ve
4449 
4450    Where the submatrices A,B,C are owned by proc0, D,E,F are
4451    owned by proc1, G,H,I are owned by proc2.
4452 
4453    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4454    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4455    The 'M','N' parameters are 8,8, and have the same values on all procs.
4456 
4457    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4458    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4459    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4460    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4461    part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4462    matrix, ans [DF] as another SeqAIJ matrix.
4463 
4464    When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4465    allocated for every row of the local diagonal submatrix, and `o_nz`
4466    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4467    One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4468    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4469    In this case, the values of `d_nz`,`o_nz` are
4470 .vb
4471      proc0  dnz = 2, o_nz = 2
4472      proc1  dnz = 3, o_nz = 2
4473      proc2  dnz = 1, o_nz = 4
4474 .ve
4475    We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4476    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4477    for proc3. i.e we are using 12+15+10=37 storage locations to store
4478    34 values.
4479 
4480    When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4481    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4482    In the above case the values for d_nnz,o_nnz are
4483 .vb
4484      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4485      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4486      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4487 .ve
4488    Here the space allocated is sum of all the above values i.e 34, and
4489    hence pre-allocation is perfect.
4490 
4491 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4492           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4493 @*/
4494 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4495 {
4496   PetscMPIInt size;
4497 
4498   PetscFunctionBegin;
4499   PetscCall(MatCreate(comm, A));
4500   PetscCall(MatSetSizes(*A, m, n, M, N));
4501   PetscCallMPI(MPI_Comm_size(comm, &size));
4502   if (size > 1) {
4503     PetscCall(MatSetType(*A, MATMPIAIJ));
4504     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4505   } else {
4506     PetscCall(MatSetType(*A, MATSEQAIJ));
4507     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4508   }
4509   PetscFunctionReturn(PETSC_SUCCESS);
4510 }
4511 
4512 /*MC
4513     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4514 
4515     Synopsis:
4516     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4517 
4518     Not Collective
4519 
4520     Input Parameter:
4521 .   A - the `MATMPIAIJ` matrix
4522 
4523     Output Parameters:
4524 +   Ad - the diagonal portion of the matrix
4525 .   Ao - the off diagonal portion of the matrix
4526 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4527 -   ierr - error code
4528 
4529      Level: advanced
4530 
4531     Note:
4532     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4533 
4534 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4535 M*/
4536 
4537 /*MC
4538     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4539 
4540     Synopsis:
4541     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4542 
4543     Not Collective
4544 
4545     Input Parameters:
4546 +   A - the `MATMPIAIJ` matrix
4547 .   Ad - the diagonal portion of the matrix
4548 .   Ao - the off diagonal portion of the matrix
4549 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4550 -   ierr - error code
4551 
4552      Level: advanced
4553 
4554 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4555 M*/
4556 
4557 /*@C
4558   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4559 
4560   Not Collective
4561 
4562   Input Parameter:
4563 . A - The `MATMPIAIJ` matrix
4564 
4565   Output Parameters:
4566 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4567 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4568 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4569 
4570   Level: intermediate
4571 
4572   Note:
4573   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4574   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4575   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4576   local column numbers to global column numbers in the original matrix.
4577 
4578   Fortran Note:
4579   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4580 
4581 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4582 @*/
4583 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4584 {
4585   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4586   PetscBool   flg;
4587 
4588   PetscFunctionBegin;
4589   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4590   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4591   if (Ad) *Ad = a->A;
4592   if (Ao) *Ao = a->B;
4593   if (colmap) *colmap = a->garray;
4594   PetscFunctionReturn(PETSC_SUCCESS);
4595 }
4596 
4597 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4598 {
4599   PetscInt     m, N, i, rstart, nnz, Ii;
4600   PetscInt    *indx;
4601   PetscScalar *values;
4602   MatType      rootType;
4603 
4604   PetscFunctionBegin;
4605   PetscCall(MatGetSize(inmat, &m, &N));
4606   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4607     PetscInt *dnz, *onz, sum, bs, cbs;
4608 
4609     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4610     /* Check sum(n) = N */
4611     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4612     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4613 
4614     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4615     rstart -= m;
4616 
4617     MatPreallocateBegin(comm, m, n, dnz, onz);
4618     for (i = 0; i < m; i++) {
4619       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4620       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4621       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4622     }
4623 
4624     PetscCall(MatCreate(comm, outmat));
4625     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4626     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4627     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4628     PetscCall(MatGetRootType_Private(inmat, &rootType));
4629     PetscCall(MatSetType(*outmat, rootType));
4630     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4631     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4632     MatPreallocateEnd(dnz, onz);
4633     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4634   }
4635 
4636   /* numeric phase */
4637   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4638   for (i = 0; i < m; i++) {
4639     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4640     Ii = i + rstart;
4641     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4642     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4643   }
4644   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4645   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4646   PetscFunctionReturn(PETSC_SUCCESS);
4647 }
4648 
4649 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4650 {
4651   PetscMPIInt        rank;
4652   PetscInt           m, N, i, rstart, nnz;
4653   size_t             len;
4654   const PetscInt    *indx;
4655   PetscViewer        out;
4656   char              *name;
4657   Mat                B;
4658   const PetscScalar *values;
4659 
4660   PetscFunctionBegin;
4661   PetscCall(MatGetLocalSize(A, &m, NULL));
4662   PetscCall(MatGetSize(A, NULL, &N));
4663   /* Should this be the type of the diagonal block of A? */
4664   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4665   PetscCall(MatSetSizes(B, m, N, m, N));
4666   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4667   PetscCall(MatSetType(B, MATSEQAIJ));
4668   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4669   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4670   for (i = 0; i < m; i++) {
4671     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4672     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4673     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4674   }
4675   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4676   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4677 
4678   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4679   PetscCall(PetscStrlen(outfile, &len));
4680   PetscCall(PetscMalloc1(len + 6, &name));
4681   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4682   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4683   PetscCall(PetscFree(name));
4684   PetscCall(MatView(B, out));
4685   PetscCall(PetscViewerDestroy(&out));
4686   PetscCall(MatDestroy(&B));
4687   PetscFunctionReturn(PETSC_SUCCESS);
4688 }
4689 
4690 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4691 {
4692   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4693 
4694   PetscFunctionBegin;
4695   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4696   PetscCall(PetscFree(merge->id_r));
4697   PetscCall(PetscFree(merge->len_s));
4698   PetscCall(PetscFree(merge->len_r));
4699   PetscCall(PetscFree(merge->bi));
4700   PetscCall(PetscFree(merge->bj));
4701   PetscCall(PetscFree(merge->buf_ri[0]));
4702   PetscCall(PetscFree(merge->buf_ri));
4703   PetscCall(PetscFree(merge->buf_rj[0]));
4704   PetscCall(PetscFree(merge->buf_rj));
4705   PetscCall(PetscFree(merge->coi));
4706   PetscCall(PetscFree(merge->coj));
4707   PetscCall(PetscFree(merge->owners_co));
4708   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4709   PetscCall(PetscFree(merge));
4710   PetscFunctionReturn(PETSC_SUCCESS);
4711 }
4712 
4713 #include <../src/mat/utils/freespace.h>
4714 #include <petscbt.h>
4715 
4716 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4717 {
4718   MPI_Comm             comm;
4719   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4720   PetscMPIInt          size, rank, taga, *len_s;
4721   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4722   PetscInt             proc, m;
4723   PetscInt           **buf_ri, **buf_rj;
4724   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4725   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4726   MPI_Request         *s_waits, *r_waits;
4727   MPI_Status          *status;
4728   const MatScalar     *aa, *a_a;
4729   MatScalar          **abuf_r, *ba_i;
4730   Mat_Merge_SeqsToMPI *merge;
4731   PetscContainer       container;
4732 
4733   PetscFunctionBegin;
4734   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4735   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4736 
4737   PetscCallMPI(MPI_Comm_size(comm, &size));
4738   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4739 
4740   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4741   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4742   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4743   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4744   aa = a_a;
4745 
4746   bi     = merge->bi;
4747   bj     = merge->bj;
4748   buf_ri = merge->buf_ri;
4749   buf_rj = merge->buf_rj;
4750 
4751   PetscCall(PetscMalloc1(size, &status));
4752   owners = merge->rowmap->range;
4753   len_s  = merge->len_s;
4754 
4755   /* send and recv matrix values */
4756   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4757   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4758 
4759   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4760   for (proc = 0, k = 0; proc < size; proc++) {
4761     if (!len_s[proc]) continue;
4762     i = owners[proc];
4763     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4764     k++;
4765   }
4766 
4767   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4768   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4769   PetscCall(PetscFree(status));
4770 
4771   PetscCall(PetscFree(s_waits));
4772   PetscCall(PetscFree(r_waits));
4773 
4774   /* insert mat values of mpimat */
4775   PetscCall(PetscMalloc1(N, &ba_i));
4776   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4777 
4778   for (k = 0; k < merge->nrecv; k++) {
4779     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4780     nrows       = *(buf_ri_k[k]);
4781     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4782     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4783   }
4784 
4785   /* set values of ba */
4786   m = merge->rowmap->n;
4787   for (i = 0; i < m; i++) {
4788     arow = owners[rank] + i;
4789     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4790     bnzi = bi[i + 1] - bi[i];
4791     PetscCall(PetscArrayzero(ba_i, bnzi));
4792 
4793     /* add local non-zero vals of this proc's seqmat into ba */
4794     anzi   = ai[arow + 1] - ai[arow];
4795     aj     = a->j + ai[arow];
4796     aa     = a_a + ai[arow];
4797     nextaj = 0;
4798     for (j = 0; nextaj < anzi; j++) {
4799       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4800         ba_i[j] += aa[nextaj++];
4801       }
4802     }
4803 
4804     /* add received vals into ba */
4805     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4806       /* i-th row */
4807       if (i == *nextrow[k]) {
4808         anzi   = *(nextai[k] + 1) - *nextai[k];
4809         aj     = buf_rj[k] + *(nextai[k]);
4810         aa     = abuf_r[k] + *(nextai[k]);
4811         nextaj = 0;
4812         for (j = 0; nextaj < anzi; j++) {
4813           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4814             ba_i[j] += aa[nextaj++];
4815           }
4816         }
4817         nextrow[k]++;
4818         nextai[k]++;
4819       }
4820     }
4821     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4822   }
4823   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4824   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4825   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4826 
4827   PetscCall(PetscFree(abuf_r[0]));
4828   PetscCall(PetscFree(abuf_r));
4829   PetscCall(PetscFree(ba_i));
4830   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4831   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4832   PetscFunctionReturn(PETSC_SUCCESS);
4833 }
4834 
4835 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4836 {
4837   Mat                  B_mpi;
4838   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4839   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4840   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4841   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4842   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4843   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4844   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4845   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4846   MPI_Status          *status;
4847   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4848   PetscBT              lnkbt;
4849   Mat_Merge_SeqsToMPI *merge;
4850   PetscContainer       container;
4851 
4852   PetscFunctionBegin;
4853   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4854 
4855   /* make sure it is a PETSc comm */
4856   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4857   PetscCallMPI(MPI_Comm_size(comm, &size));
4858   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4859 
4860   PetscCall(PetscNew(&merge));
4861   PetscCall(PetscMalloc1(size, &status));
4862 
4863   /* determine row ownership */
4864   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4865   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4866   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4867   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4868   PetscCall(PetscLayoutSetUp(merge->rowmap));
4869   PetscCall(PetscMalloc1(size, &len_si));
4870   PetscCall(PetscMalloc1(size, &merge->len_s));
4871 
4872   m      = merge->rowmap->n;
4873   owners = merge->rowmap->range;
4874 
4875   /* determine the number of messages to send, their lengths */
4876   len_s = merge->len_s;
4877 
4878   len          = 0; /* length of buf_si[] */
4879   merge->nsend = 0;
4880   for (proc = 0; proc < size; proc++) {
4881     len_si[proc] = 0;
4882     if (proc == rank) {
4883       len_s[proc] = 0;
4884     } else {
4885       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4886       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4887     }
4888     if (len_s[proc]) {
4889       merge->nsend++;
4890       nrows = 0;
4891       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4892         if (ai[i + 1] > ai[i]) nrows++;
4893       }
4894       len_si[proc] = 2 * (nrows + 1);
4895       len += len_si[proc];
4896     }
4897   }
4898 
4899   /* determine the number and length of messages to receive for ij-structure */
4900   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4901   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4902 
4903   /* post the Irecv of j-structure */
4904   PetscCall(PetscCommGetNewTag(comm, &tagj));
4905   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4906 
4907   /* post the Isend of j-structure */
4908   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4909 
4910   for (proc = 0, k = 0; proc < size; proc++) {
4911     if (!len_s[proc]) continue;
4912     i = owners[proc];
4913     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4914     k++;
4915   }
4916 
4917   /* receives and sends of j-structure are complete */
4918   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4919   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4920 
4921   /* send and recv i-structure */
4922   PetscCall(PetscCommGetNewTag(comm, &tagi));
4923   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4924 
4925   PetscCall(PetscMalloc1(len + 1, &buf_s));
4926   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4927   for (proc = 0, k = 0; proc < size; proc++) {
4928     if (!len_s[proc]) continue;
4929     /* form outgoing message for i-structure:
4930          buf_si[0]:                 nrows to be sent
4931                [1:nrows]:           row index (global)
4932                [nrows+1:2*nrows+1]: i-structure index
4933     */
4934     nrows       = len_si[proc] / 2 - 1;
4935     buf_si_i    = buf_si + nrows + 1;
4936     buf_si[0]   = nrows;
4937     buf_si_i[0] = 0;
4938     nrows       = 0;
4939     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4940       anzi = ai[i + 1] - ai[i];
4941       if (anzi) {
4942         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4943         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4944         nrows++;
4945       }
4946     }
4947     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4948     k++;
4949     buf_si += len_si[proc];
4950   }
4951 
4952   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4953   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4954 
4955   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4956   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4957 
4958   PetscCall(PetscFree(len_si));
4959   PetscCall(PetscFree(len_ri));
4960   PetscCall(PetscFree(rj_waits));
4961   PetscCall(PetscFree2(si_waits, sj_waits));
4962   PetscCall(PetscFree(ri_waits));
4963   PetscCall(PetscFree(buf_s));
4964   PetscCall(PetscFree(status));
4965 
4966   /* compute a local seq matrix in each processor */
4967   /* allocate bi array and free space for accumulating nonzero column info */
4968   PetscCall(PetscMalloc1(m + 1, &bi));
4969   bi[0] = 0;
4970 
4971   /* create and initialize a linked list */
4972   nlnk = N + 1;
4973   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4974 
4975   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4976   len = ai[owners[rank + 1]] - ai[owners[rank]];
4977   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4978 
4979   current_space = free_space;
4980 
4981   /* determine symbolic info for each local row */
4982   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4983 
4984   for (k = 0; k < merge->nrecv; k++) {
4985     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4986     nrows       = *buf_ri_k[k];
4987     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4988     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4989   }
4990 
4991   MatPreallocateBegin(comm, m, n, dnz, onz);
4992   len = 0;
4993   for (i = 0; i < m; i++) {
4994     bnzi = 0;
4995     /* add local non-zero cols of this proc's seqmat into lnk */
4996     arow = owners[rank] + i;
4997     anzi = ai[arow + 1] - ai[arow];
4998     aj   = a->j + ai[arow];
4999     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5000     bnzi += nlnk;
5001     /* add received col data into lnk */
5002     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5003       if (i == *nextrow[k]) {            /* i-th row */
5004         anzi = *(nextai[k] + 1) - *nextai[k];
5005         aj   = buf_rj[k] + *nextai[k];
5006         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5007         bnzi += nlnk;
5008         nextrow[k]++;
5009         nextai[k]++;
5010       }
5011     }
5012     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5013 
5014     /* if free space is not available, make more free space */
5015     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5016     /* copy data into free space, then initialize lnk */
5017     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5018     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5019 
5020     current_space->array += bnzi;
5021     current_space->local_used += bnzi;
5022     current_space->local_remaining -= bnzi;
5023 
5024     bi[i + 1] = bi[i] + bnzi;
5025   }
5026 
5027   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5028 
5029   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5030   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5031   PetscCall(PetscLLDestroy(lnk, lnkbt));
5032 
5033   /* create symbolic parallel matrix B_mpi */
5034   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5035   PetscCall(MatCreate(comm, &B_mpi));
5036   if (n == PETSC_DECIDE) {
5037     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5038   } else {
5039     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5040   }
5041   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5042   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5043   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5044   MatPreallocateEnd(dnz, onz);
5045   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5046 
5047   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5048   B_mpi->assembled = PETSC_FALSE;
5049   merge->bi        = bi;
5050   merge->bj        = bj;
5051   merge->buf_ri    = buf_ri;
5052   merge->buf_rj    = buf_rj;
5053   merge->coi       = NULL;
5054   merge->coj       = NULL;
5055   merge->owners_co = NULL;
5056 
5057   PetscCall(PetscCommDestroy(&comm));
5058 
5059   /* attach the supporting struct to B_mpi for reuse */
5060   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5061   PetscCall(PetscContainerSetPointer(container, merge));
5062   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5063   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5064   PetscCall(PetscContainerDestroy(&container));
5065   *mpimat = B_mpi;
5066 
5067   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5068   PetscFunctionReturn(PETSC_SUCCESS);
5069 }
5070 
5071 /*@C
5072       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5073                  matrices from each processor
5074 
5075     Collective
5076 
5077    Input Parameters:
5078 +    comm - the communicators the parallel matrix will live on
5079 .    seqmat - the input sequential matrices
5080 .    m - number of local rows (or `PETSC_DECIDE`)
5081 .    n - number of local columns (or `PETSC_DECIDE`)
5082 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5083 
5084    Output Parameter:
5085 .    mpimat - the parallel matrix generated
5086 
5087     Level: advanced
5088 
5089    Note:
5090      The dimensions of the sequential matrix in each processor MUST be the same.
5091      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5092      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5093 
5094 .seealso: [](chapter_matrices), `Mat`, `MatCreateAIJ()`
5095 @*/
5096 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5097 {
5098   PetscMPIInt size;
5099 
5100   PetscFunctionBegin;
5101   PetscCallMPI(MPI_Comm_size(comm, &size));
5102   if (size == 1) {
5103     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5104     if (scall == MAT_INITIAL_MATRIX) {
5105       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5106     } else {
5107       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5108     }
5109     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5110     PetscFunctionReturn(PETSC_SUCCESS);
5111   }
5112   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5113   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5114   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5115   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5116   PetscFunctionReturn(PETSC_SUCCESS);
5117 }
5118 
5119 /*@
5120      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking its local rows and putting them into a sequential matrix with
5121           mlocal rows and n columns. Where mlocal is obtained with `MatGetLocalSize()` and n is the global column count obtained
5122           with `MatGetSize()`
5123 
5124     Not Collective
5125 
5126    Input Parameter:
5127 .    A - the matrix
5128 
5129    Output Parameter:
5130 .    A_loc - the local sequential matrix generated
5131 
5132     Level: developer
5133 
5134    Notes:
5135      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5136 
5137      Destroy the matrix with `MatDestroy()`
5138 
5139 .seealso: [](chapter_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5140 @*/
5141 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5142 {
5143   PetscBool mpi;
5144 
5145   PetscFunctionBegin;
5146   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5147   if (mpi) {
5148     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5149   } else {
5150     *A_loc = A;
5151     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5152   }
5153   PetscFunctionReturn(PETSC_SUCCESS);
5154 }
5155 
5156 /*@
5157      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5158           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5159           with `MatGetSize()`
5160 
5161     Not Collective
5162 
5163    Input Parameters:
5164 +    A - the matrix
5165 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5166 
5167    Output Parameter:
5168 .    A_loc - the local sequential matrix generated
5169 
5170     Level: developer
5171 
5172    Notes:
5173      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5174 
5175      When the communicator associated with `A` has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A`.
5176      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called.
5177      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5178      modify the values of the returned `A_loc`.
5179 
5180 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5181 @*/
5182 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5183 {
5184   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5185   Mat_SeqAIJ        *mat, *a, *b;
5186   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5187   const PetscScalar *aa, *ba, *aav, *bav;
5188   PetscScalar       *ca, *cam;
5189   PetscMPIInt        size;
5190   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5191   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5192   PetscBool          match;
5193 
5194   PetscFunctionBegin;
5195   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5196   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5197   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5198   if (size == 1) {
5199     if (scall == MAT_INITIAL_MATRIX) {
5200       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5201       *A_loc = mpimat->A;
5202     } else if (scall == MAT_REUSE_MATRIX) {
5203       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5204     }
5205     PetscFunctionReturn(PETSC_SUCCESS);
5206   }
5207 
5208   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5209   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5210   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5211   ai = a->i;
5212   aj = a->j;
5213   bi = b->i;
5214   bj = b->j;
5215   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5216   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5217   aa = aav;
5218   ba = bav;
5219   if (scall == MAT_INITIAL_MATRIX) {
5220     PetscCall(PetscMalloc1(1 + am, &ci));
5221     ci[0] = 0;
5222     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5223     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5224     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5225     k = 0;
5226     for (i = 0; i < am; i++) {
5227       ncols_o = bi[i + 1] - bi[i];
5228       ncols_d = ai[i + 1] - ai[i];
5229       /* off-diagonal portion of A */
5230       for (jo = 0; jo < ncols_o; jo++) {
5231         col = cmap[*bj];
5232         if (col >= cstart) break;
5233         cj[k] = col;
5234         bj++;
5235         ca[k++] = *ba++;
5236       }
5237       /* diagonal portion of A */
5238       for (j = 0; j < ncols_d; j++) {
5239         cj[k]   = cstart + *aj++;
5240         ca[k++] = *aa++;
5241       }
5242       /* off-diagonal portion of A */
5243       for (j = jo; j < ncols_o; j++) {
5244         cj[k]   = cmap[*bj++];
5245         ca[k++] = *ba++;
5246       }
5247     }
5248     /* put together the new matrix */
5249     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5250     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5251     /* Since these are PETSc arrays, change flags to free them as necessary. */
5252     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5253     mat->free_a  = PETSC_TRUE;
5254     mat->free_ij = PETSC_TRUE;
5255     mat->nonew   = 0;
5256   } else if (scall == MAT_REUSE_MATRIX) {
5257     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5258     ci  = mat->i;
5259     cj  = mat->j;
5260     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5261     for (i = 0; i < am; i++) {
5262       /* off-diagonal portion of A */
5263       ncols_o = bi[i + 1] - bi[i];
5264       for (jo = 0; jo < ncols_o; jo++) {
5265         col = cmap[*bj];
5266         if (col >= cstart) break;
5267         *cam++ = *ba++;
5268         bj++;
5269       }
5270       /* diagonal portion of A */
5271       ncols_d = ai[i + 1] - ai[i];
5272       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5273       /* off-diagonal portion of A */
5274       for (j = jo; j < ncols_o; j++) {
5275         *cam++ = *ba++;
5276         bj++;
5277       }
5278     }
5279     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5280   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5281   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5282   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5283   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5284   PetscFunctionReturn(PETSC_SUCCESS);
5285 }
5286 
5287 /*@
5288      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5289           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5290 
5291     Not Collective
5292 
5293    Input Parameters:
5294 +    A - the matrix
5295 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5296 
5297    Output Parameters:
5298 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5299 -    A_loc - the local sequential matrix generated
5300 
5301     Level: developer
5302 
5303    Note:
5304      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5305      part, then those associated with the off diagonal part (in its local ordering)
5306 
5307 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5308 @*/
5309 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5310 {
5311   Mat             Ao, Ad;
5312   const PetscInt *cmap;
5313   PetscMPIInt     size;
5314   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5315 
5316   PetscFunctionBegin;
5317   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5318   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5319   if (size == 1) {
5320     if (scall == MAT_INITIAL_MATRIX) {
5321       PetscCall(PetscObjectReference((PetscObject)Ad));
5322       *A_loc = Ad;
5323     } else if (scall == MAT_REUSE_MATRIX) {
5324       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5325     }
5326     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5327     PetscFunctionReturn(PETSC_SUCCESS);
5328   }
5329   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5330   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5331   if (f) {
5332     PetscCall((*f)(A, scall, glob, A_loc));
5333   } else {
5334     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5335     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5336     Mat_SeqAIJ        *c;
5337     PetscInt          *ai = a->i, *aj = a->j;
5338     PetscInt          *bi = b->i, *bj = b->j;
5339     PetscInt          *ci, *cj;
5340     const PetscScalar *aa, *ba;
5341     PetscScalar       *ca;
5342     PetscInt           i, j, am, dn, on;
5343 
5344     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5345     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5346     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5347     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5348     if (scall == MAT_INITIAL_MATRIX) {
5349       PetscInt k;
5350       PetscCall(PetscMalloc1(1 + am, &ci));
5351       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5352       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5353       ci[0] = 0;
5354       for (i = 0, k = 0; i < am; i++) {
5355         const PetscInt ncols_o = bi[i + 1] - bi[i];
5356         const PetscInt ncols_d = ai[i + 1] - ai[i];
5357         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5358         /* diagonal portion of A */
5359         for (j = 0; j < ncols_d; j++, k++) {
5360           cj[k] = *aj++;
5361           ca[k] = *aa++;
5362         }
5363         /* off-diagonal portion of A */
5364         for (j = 0; j < ncols_o; j++, k++) {
5365           cj[k] = dn + *bj++;
5366           ca[k] = *ba++;
5367         }
5368       }
5369       /* put together the new matrix */
5370       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5371       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5372       /* Since these are PETSc arrays, change flags to free them as necessary. */
5373       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5374       c->free_a  = PETSC_TRUE;
5375       c->free_ij = PETSC_TRUE;
5376       c->nonew   = 0;
5377       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5378     } else if (scall == MAT_REUSE_MATRIX) {
5379       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5380       for (i = 0; i < am; i++) {
5381         const PetscInt ncols_d = ai[i + 1] - ai[i];
5382         const PetscInt ncols_o = bi[i + 1] - bi[i];
5383         /* diagonal portion of A */
5384         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5385         /* off-diagonal portion of A */
5386         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5387       }
5388       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5389     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5390     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5391     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5392     if (glob) {
5393       PetscInt cst, *gidx;
5394 
5395       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5396       PetscCall(PetscMalloc1(dn + on, &gidx));
5397       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5398       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5399       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5400     }
5401   }
5402   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5403   PetscFunctionReturn(PETSC_SUCCESS);
5404 }
5405 
5406 /*@C
5407      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5408 
5409     Not Collective
5410 
5411    Input Parameters:
5412 +    A - the matrix
5413 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5414 .    row - index set of rows to extract (or `NULL`)
5415 -    col - index set of columns to extract (or `NULL`)
5416 
5417    Output Parameter:
5418 .    A_loc - the local sequential matrix generated
5419 
5420     Level: developer
5421 
5422 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5423 @*/
5424 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5425 {
5426   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5427   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5428   IS          isrowa, iscola;
5429   Mat        *aloc;
5430   PetscBool   match;
5431 
5432   PetscFunctionBegin;
5433   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5434   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5435   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5436   if (!row) {
5437     start = A->rmap->rstart;
5438     end   = A->rmap->rend;
5439     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5440   } else {
5441     isrowa = *row;
5442   }
5443   if (!col) {
5444     start = A->cmap->rstart;
5445     cmap  = a->garray;
5446     nzA   = a->A->cmap->n;
5447     nzB   = a->B->cmap->n;
5448     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5449     ncols = 0;
5450     for (i = 0; i < nzB; i++) {
5451       if (cmap[i] < start) idx[ncols++] = cmap[i];
5452       else break;
5453     }
5454     imark = i;
5455     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5456     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5457     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5458   } else {
5459     iscola = *col;
5460   }
5461   if (scall != MAT_INITIAL_MATRIX) {
5462     PetscCall(PetscMalloc1(1, &aloc));
5463     aloc[0] = *A_loc;
5464   }
5465   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5466   if (!col) { /* attach global id of condensed columns */
5467     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5468   }
5469   *A_loc = aloc[0];
5470   PetscCall(PetscFree(aloc));
5471   if (!row) PetscCall(ISDestroy(&isrowa));
5472   if (!col) PetscCall(ISDestroy(&iscola));
5473   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5474   PetscFunctionReturn(PETSC_SUCCESS);
5475 }
5476 
5477 /*
5478  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5479  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5480  * on a global size.
5481  * */
5482 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5483 {
5484   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5485   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5486   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5487   PetscMPIInt            owner;
5488   PetscSFNode           *iremote, *oiremote;
5489   const PetscInt        *lrowindices;
5490   PetscSF                sf, osf;
5491   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5492   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5493   MPI_Comm               comm;
5494   ISLocalToGlobalMapping mapping;
5495   const PetscScalar     *pd_a, *po_a;
5496 
5497   PetscFunctionBegin;
5498   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5499   /* plocalsize is the number of roots
5500    * nrows is the number of leaves
5501    * */
5502   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5503   PetscCall(ISGetLocalSize(rows, &nrows));
5504   PetscCall(PetscCalloc1(nrows, &iremote));
5505   PetscCall(ISGetIndices(rows, &lrowindices));
5506   for (i = 0; i < nrows; i++) {
5507     /* Find a remote index and an owner for a row
5508      * The row could be local or remote
5509      * */
5510     owner = 0;
5511     lidx  = 0;
5512     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5513     iremote[i].index = lidx;
5514     iremote[i].rank  = owner;
5515   }
5516   /* Create SF to communicate how many nonzero columns for each row */
5517   PetscCall(PetscSFCreate(comm, &sf));
5518   /* SF will figure out the number of nonzero colunms for each row, and their
5519    * offsets
5520    * */
5521   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5522   PetscCall(PetscSFSetFromOptions(sf));
5523   PetscCall(PetscSFSetUp(sf));
5524 
5525   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5526   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5527   PetscCall(PetscCalloc1(nrows, &pnnz));
5528   roffsets[0] = 0;
5529   roffsets[1] = 0;
5530   for (i = 0; i < plocalsize; i++) {
5531     /* diag */
5532     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5533     /* off diag */
5534     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5535     /* compute offsets so that we relative location for each row */
5536     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5537     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5538   }
5539   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5540   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5541   /* 'r' means root, and 'l' means leaf */
5542   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5543   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5544   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5545   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5546   PetscCall(PetscSFDestroy(&sf));
5547   PetscCall(PetscFree(roffsets));
5548   PetscCall(PetscFree(nrcols));
5549   dntotalcols = 0;
5550   ontotalcols = 0;
5551   ncol        = 0;
5552   for (i = 0; i < nrows; i++) {
5553     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5554     ncol    = PetscMax(pnnz[i], ncol);
5555     /* diag */
5556     dntotalcols += nlcols[i * 2 + 0];
5557     /* off diag */
5558     ontotalcols += nlcols[i * 2 + 1];
5559   }
5560   /* We do not need to figure the right number of columns
5561    * since all the calculations will be done by going through the raw data
5562    * */
5563   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5564   PetscCall(MatSetUp(*P_oth));
5565   PetscCall(PetscFree(pnnz));
5566   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5567   /* diag */
5568   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5569   /* off diag */
5570   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5571   /* diag */
5572   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5573   /* off diag */
5574   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5575   dntotalcols = 0;
5576   ontotalcols = 0;
5577   ntotalcols  = 0;
5578   for (i = 0; i < nrows; i++) {
5579     owner = 0;
5580     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5581     /* Set iremote for diag matrix */
5582     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5583       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5584       iremote[dntotalcols].rank  = owner;
5585       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5586       ilocal[dntotalcols++] = ntotalcols++;
5587     }
5588     /* off diag */
5589     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5590       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5591       oiremote[ontotalcols].rank  = owner;
5592       oilocal[ontotalcols++]      = ntotalcols++;
5593     }
5594   }
5595   PetscCall(ISRestoreIndices(rows, &lrowindices));
5596   PetscCall(PetscFree(loffsets));
5597   PetscCall(PetscFree(nlcols));
5598   PetscCall(PetscSFCreate(comm, &sf));
5599   /* P serves as roots and P_oth is leaves
5600    * Diag matrix
5601    * */
5602   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5603   PetscCall(PetscSFSetFromOptions(sf));
5604   PetscCall(PetscSFSetUp(sf));
5605 
5606   PetscCall(PetscSFCreate(comm, &osf));
5607   /* Off diag */
5608   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5609   PetscCall(PetscSFSetFromOptions(osf));
5610   PetscCall(PetscSFSetUp(osf));
5611   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5612   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5613   /* We operate on the matrix internal data for saving memory */
5614   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5615   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5616   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5617   /* Convert to global indices for diag matrix */
5618   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5619   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5620   /* We want P_oth store global indices */
5621   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5622   /* Use memory scalable approach */
5623   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5624   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5625   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5626   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5627   /* Convert back to local indices */
5628   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5629   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5630   nout = 0;
5631   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5632   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5633   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5634   /* Exchange values */
5635   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5636   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5637   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5638   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5639   /* Stop PETSc from shrinking memory */
5640   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5641   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5642   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5643   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5644   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5645   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5646   PetscCall(PetscSFDestroy(&sf));
5647   PetscCall(PetscSFDestroy(&osf));
5648   PetscFunctionReturn(PETSC_SUCCESS);
5649 }
5650 
5651 /*
5652  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5653  * This supports MPIAIJ and MAIJ
5654  * */
5655 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5656 {
5657   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5658   Mat_SeqAIJ *p_oth;
5659   IS          rows, map;
5660   PetscHMapI  hamp;
5661   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5662   MPI_Comm    comm;
5663   PetscSF     sf, osf;
5664   PetscBool   has;
5665 
5666   PetscFunctionBegin;
5667   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5668   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5669   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5670    *  and then create a submatrix (that often is an overlapping matrix)
5671    * */
5672   if (reuse == MAT_INITIAL_MATRIX) {
5673     /* Use a hash table to figure out unique keys */
5674     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5675     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5676     count = 0;
5677     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5678     for (i = 0; i < a->B->cmap->n; i++) {
5679       key = a->garray[i] / dof;
5680       PetscCall(PetscHMapIHas(hamp, key, &has));
5681       if (!has) {
5682         mapping[i] = count;
5683         PetscCall(PetscHMapISet(hamp, key, count++));
5684       } else {
5685         /* Current 'i' has the same value the previous step */
5686         mapping[i] = count - 1;
5687       }
5688     }
5689     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5690     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5691     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5692     PetscCall(PetscCalloc1(htsize, &rowindices));
5693     off = 0;
5694     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5695     PetscCall(PetscHMapIDestroy(&hamp));
5696     PetscCall(PetscSortInt(htsize, rowindices));
5697     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5698     /* In case, the matrix was already created but users want to recreate the matrix */
5699     PetscCall(MatDestroy(P_oth));
5700     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5701     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5702     PetscCall(ISDestroy(&map));
5703     PetscCall(ISDestroy(&rows));
5704   } else if (reuse == MAT_REUSE_MATRIX) {
5705     /* If matrix was already created, we simply update values using SF objects
5706      * that as attached to the matrix earlier.
5707      */
5708     const PetscScalar *pd_a, *po_a;
5709 
5710     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5711     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5712     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5713     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5714     /* Update values in place */
5715     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5716     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5717     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5718     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5719     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5720     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5721     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5722     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5723   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5724   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5725   PetscFunctionReturn(PETSC_SUCCESS);
5726 }
5727 
5728 /*@C
5729   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5730 
5731   Collective
5732 
5733   Input Parameters:
5734 + A - the first matrix in `MATMPIAIJ` format
5735 . B - the second matrix in `MATMPIAIJ` format
5736 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5737 
5738   Output Parameters:
5739 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output
5740 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output
5741 - B_seq - the sequential matrix generated
5742 
5743   Level: developer
5744 
5745 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5746 @*/
5747 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5748 {
5749   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5750   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5751   IS          isrowb, iscolb;
5752   Mat        *bseq = NULL;
5753 
5754   PetscFunctionBegin;
5755   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5756              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5757   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5758 
5759   if (scall == MAT_INITIAL_MATRIX) {
5760     start = A->cmap->rstart;
5761     cmap  = a->garray;
5762     nzA   = a->A->cmap->n;
5763     nzB   = a->B->cmap->n;
5764     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5765     ncols = 0;
5766     for (i = 0; i < nzB; i++) { /* row < local row index */
5767       if (cmap[i] < start) idx[ncols++] = cmap[i];
5768       else break;
5769     }
5770     imark = i;
5771     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5772     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5773     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5774     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5775   } else {
5776     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5777     isrowb = *rowb;
5778     iscolb = *colb;
5779     PetscCall(PetscMalloc1(1, &bseq));
5780     bseq[0] = *B_seq;
5781   }
5782   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5783   *B_seq = bseq[0];
5784   PetscCall(PetscFree(bseq));
5785   if (!rowb) {
5786     PetscCall(ISDestroy(&isrowb));
5787   } else {
5788     *rowb = isrowb;
5789   }
5790   if (!colb) {
5791     PetscCall(ISDestroy(&iscolb));
5792   } else {
5793     *colb = iscolb;
5794   }
5795   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5796   PetscFunctionReturn(PETSC_SUCCESS);
5797 }
5798 
5799 /*
5800     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5801     of the OFF-DIAGONAL portion of local A
5802 
5803     Collective
5804 
5805    Input Parameters:
5806 +    A,B - the matrices in `MATMPIAIJ` format
5807 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5808 
5809    Output Parameter:
5810 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5811 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5812 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5813 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5814 
5815     Developer Note:
5816     This directly accesses information inside the VecScatter associated with the matrix-vector product
5817      for this matrix. This is not desirable..
5818 
5819     Level: developer
5820 
5821 */
5822 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5823 {
5824   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5825   Mat_SeqAIJ        *b_oth;
5826   VecScatter         ctx;
5827   MPI_Comm           comm;
5828   const PetscMPIInt *rprocs, *sprocs;
5829   const PetscInt    *srow, *rstarts, *sstarts;
5830   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5831   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5832   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5833   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5834   PetscMPIInt        size, tag, rank, nreqs;
5835 
5836   PetscFunctionBegin;
5837   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5838   PetscCallMPI(MPI_Comm_size(comm, &size));
5839 
5840   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5841              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5842   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5843   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5844 
5845   if (size == 1) {
5846     startsj_s = NULL;
5847     bufa_ptr  = NULL;
5848     *B_oth    = NULL;
5849     PetscFunctionReturn(PETSC_SUCCESS);
5850   }
5851 
5852   ctx = a->Mvctx;
5853   tag = ((PetscObject)ctx)->tag;
5854 
5855   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5856   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5857   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5858   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5859   PetscCall(PetscMalloc1(nreqs, &reqs));
5860   rwaits = reqs;
5861   swaits = reqs + nrecvs;
5862 
5863   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5864   if (scall == MAT_INITIAL_MATRIX) {
5865     /* i-array */
5866     /*  post receives */
5867     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5868     for (i = 0; i < nrecvs; i++) {
5869       rowlen = rvalues + rstarts[i] * rbs;
5870       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5871       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5872     }
5873 
5874     /* pack the outgoing message */
5875     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5876 
5877     sstartsj[0] = 0;
5878     rstartsj[0] = 0;
5879     len         = 0; /* total length of j or a array to be sent */
5880     if (nsends) {
5881       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5882       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5883     }
5884     for (i = 0; i < nsends; i++) {
5885       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5886       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5887       for (j = 0; j < nrows; j++) {
5888         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5889         for (l = 0; l < sbs; l++) {
5890           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5891 
5892           rowlen[j * sbs + l] = ncols;
5893 
5894           len += ncols;
5895           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5896         }
5897         k++;
5898       }
5899       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5900 
5901       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5902     }
5903     /* recvs and sends of i-array are completed */
5904     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5905     PetscCall(PetscFree(svalues));
5906 
5907     /* allocate buffers for sending j and a arrays */
5908     PetscCall(PetscMalloc1(len + 1, &bufj));
5909     PetscCall(PetscMalloc1(len + 1, &bufa));
5910 
5911     /* create i-array of B_oth */
5912     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5913 
5914     b_othi[0] = 0;
5915     len       = 0; /* total length of j or a array to be received */
5916     k         = 0;
5917     for (i = 0; i < nrecvs; i++) {
5918       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5919       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5920       for (j = 0; j < nrows; j++) {
5921         b_othi[k + 1] = b_othi[k] + rowlen[j];
5922         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5923         k++;
5924       }
5925       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5926     }
5927     PetscCall(PetscFree(rvalues));
5928 
5929     /* allocate space for j and a arrays of B_oth */
5930     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5931     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5932 
5933     /* j-array */
5934     /*  post receives of j-array */
5935     for (i = 0; i < nrecvs; i++) {
5936       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5937       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5938     }
5939 
5940     /* pack the outgoing message j-array */
5941     if (nsends) k = sstarts[0];
5942     for (i = 0; i < nsends; i++) {
5943       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5944       bufJ  = bufj + sstartsj[i];
5945       for (j = 0; j < nrows; j++) {
5946         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5947         for (ll = 0; ll < sbs; ll++) {
5948           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5949           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5950           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5951         }
5952       }
5953       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5954     }
5955 
5956     /* recvs and sends of j-array are completed */
5957     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5958   } else if (scall == MAT_REUSE_MATRIX) {
5959     sstartsj = *startsj_s;
5960     rstartsj = *startsj_r;
5961     bufa     = *bufa_ptr;
5962     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5963     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5964   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5965 
5966   /* a-array */
5967   /*  post receives of a-array */
5968   for (i = 0; i < nrecvs; i++) {
5969     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5970     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5971   }
5972 
5973   /* pack the outgoing message a-array */
5974   if (nsends) k = sstarts[0];
5975   for (i = 0; i < nsends; i++) {
5976     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5977     bufA  = bufa + sstartsj[i];
5978     for (j = 0; j < nrows; j++) {
5979       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5980       for (ll = 0; ll < sbs; ll++) {
5981         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5982         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5983         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5984       }
5985     }
5986     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5987   }
5988   /* recvs and sends of a-array are completed */
5989   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5990   PetscCall(PetscFree(reqs));
5991 
5992   if (scall == MAT_INITIAL_MATRIX) {
5993     /* put together the new matrix */
5994     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5995 
5996     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5997     /* Since these are PETSc arrays, change flags to free them as necessary. */
5998     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5999     b_oth->free_a  = PETSC_TRUE;
6000     b_oth->free_ij = PETSC_TRUE;
6001     b_oth->nonew   = 0;
6002 
6003     PetscCall(PetscFree(bufj));
6004     if (!startsj_s || !bufa_ptr) {
6005       PetscCall(PetscFree2(sstartsj, rstartsj));
6006       PetscCall(PetscFree(bufa_ptr));
6007     } else {
6008       *startsj_s = sstartsj;
6009       *startsj_r = rstartsj;
6010       *bufa_ptr  = bufa;
6011     }
6012   } else if (scall == MAT_REUSE_MATRIX) {
6013     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6014   }
6015 
6016   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6017   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6018   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6019   PetscFunctionReturn(PETSC_SUCCESS);
6020 }
6021 
6022 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6023 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6024 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6025 #if defined(PETSC_HAVE_MKL_SPARSE)
6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6027 #endif
6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6030 #if defined(PETSC_HAVE_ELEMENTAL)
6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6032 #endif
6033 #if defined(PETSC_HAVE_SCALAPACK)
6034 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6035 #endif
6036 #if defined(PETSC_HAVE_HYPRE)
6037 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6038 #endif
6039 #if defined(PETSC_HAVE_CUDA)
6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6041 #endif
6042 #if defined(PETSC_HAVE_HIP)
6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6044 #endif
6045 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6047 #endif
6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6049 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6050 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6051 
6052 /*
6053     Computes (B'*A')' since computing B*A directly is untenable
6054 
6055                n                       p                          p
6056         [             ]       [             ]         [                 ]
6057       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6058         [             ]       [             ]         [                 ]
6059 
6060 */
6061 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6062 {
6063   Mat At, Bt, Ct;
6064 
6065   PetscFunctionBegin;
6066   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6067   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6068   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6069   PetscCall(MatDestroy(&At));
6070   PetscCall(MatDestroy(&Bt));
6071   PetscCall(MatTransposeSetPrecursor(Ct, C));
6072   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6073   PetscCall(MatDestroy(&Ct));
6074   PetscFunctionReturn(PETSC_SUCCESS);
6075 }
6076 
6077 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6078 {
6079   PetscBool cisdense;
6080 
6081   PetscFunctionBegin;
6082   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6083   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6084   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6085   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6086   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6087   PetscCall(MatSetUp(C));
6088 
6089   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6090   PetscFunctionReturn(PETSC_SUCCESS);
6091 }
6092 
6093 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6094 {
6095   Mat_Product *product = C->product;
6096   Mat          A = product->A, B = product->B;
6097 
6098   PetscFunctionBegin;
6099   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6100              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6101   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6102   C->ops->productsymbolic = MatProductSymbolic_AB;
6103   PetscFunctionReturn(PETSC_SUCCESS);
6104 }
6105 
6106 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6107 {
6108   Mat_Product *product = C->product;
6109 
6110   PetscFunctionBegin;
6111   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6112   PetscFunctionReturn(PETSC_SUCCESS);
6113 }
6114 
6115 /*
6116    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6117 
6118   Input Parameters:
6119 
6120     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6121     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6122 
6123     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6124 
6125     For Set1, j1[] contains column indices of the nonzeros.
6126     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6127     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6128     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6129 
6130     Similar for Set2.
6131 
6132     This routine merges the two sets of nonzeros row by row and removes repeats.
6133 
6134   Output Parameters: (memory is allocated by the caller)
6135 
6136     i[],j[]: the CSR of the merged matrix, which has m rows.
6137     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6138     imap2[]: similar to imap1[], but for Set2.
6139     Note we order nonzeros row-by-row and from left to right.
6140 */
6141 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6142 {
6143   PetscInt   r, m; /* Row index of mat */
6144   PetscCount t, t1, t2, b1, e1, b2, e2;
6145 
6146   PetscFunctionBegin;
6147   PetscCall(MatGetLocalSize(mat, &m, NULL));
6148   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6149   i[0]        = 0;
6150   for (r = 0; r < m; r++) { /* Do row by row merging */
6151     b1 = rowBegin1[r];
6152     e1 = rowEnd1[r];
6153     b2 = rowBegin2[r];
6154     e2 = rowEnd2[r];
6155     while (b1 < e1 && b2 < e2) {
6156       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6157         j[t]      = j1[b1];
6158         imap1[t1] = t;
6159         imap2[t2] = t;
6160         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6161         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6162         t1++;
6163         t2++;
6164         t++;
6165       } else if (j1[b1] < j2[b2]) {
6166         j[t]      = j1[b1];
6167         imap1[t1] = t;
6168         b1 += jmap1[t1 + 1] - jmap1[t1];
6169         t1++;
6170         t++;
6171       } else {
6172         j[t]      = j2[b2];
6173         imap2[t2] = t;
6174         b2 += jmap2[t2 + 1] - jmap2[t2];
6175         t2++;
6176         t++;
6177       }
6178     }
6179     /* Merge the remaining in either j1[] or j2[] */
6180     while (b1 < e1) {
6181       j[t]      = j1[b1];
6182       imap1[t1] = t;
6183       b1 += jmap1[t1 + 1] - jmap1[t1];
6184       t1++;
6185       t++;
6186     }
6187     while (b2 < e2) {
6188       j[t]      = j2[b2];
6189       imap2[t2] = t;
6190       b2 += jmap2[t2 + 1] - jmap2[t2];
6191       t2++;
6192       t++;
6193     }
6194     i[r + 1] = t;
6195   }
6196   PetscFunctionReturn(PETSC_SUCCESS);
6197 }
6198 
6199 /*
6200   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6201 
6202   Input Parameters:
6203     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6204     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6205       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6206 
6207       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6208       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6209 
6210   Output Parameters:
6211     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6212     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6213       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6214       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6215 
6216     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6217       Atot: number of entries belonging to the diagonal block.
6218       Annz: number of unique nonzeros belonging to the diagonal block.
6219       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6220         repeats (i.e., same 'i,j' pair).
6221       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6222         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6223 
6224       Atot: number of entries belonging to the diagonal block
6225       Annz: number of unique nonzeros belonging to the diagonal block.
6226 
6227     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6228 
6229     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6230 */
6231 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6232 {
6233   PetscInt    cstart, cend, rstart, rend, row, col;
6234   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6235   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6236   PetscCount  k, m, p, q, r, s, mid;
6237   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6238 
6239   PetscFunctionBegin;
6240   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6241   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6242   m = rend - rstart;
6243 
6244   for (k = 0; k < n; k++) {
6245     if (i[k] >= 0) break;
6246   } /* Skip negative rows */
6247 
6248   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6249      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6250   */
6251   while (k < n) {
6252     row = i[k];
6253     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6254     for (s = k; s < n; s++)
6255       if (i[s] != row) break;
6256     for (p = k; p < s; p++) {
6257       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6258       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6259     }
6260     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6261     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6262     rowBegin[row - rstart] = k;
6263     rowMid[row - rstart]   = mid;
6264     rowEnd[row - rstart]   = s;
6265 
6266     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6267     Atot += mid - k;
6268     Btot += s - mid;
6269 
6270     /* Count unique nonzeros of this diag/offdiag row */
6271     for (p = k; p < mid;) {
6272       col = j[p];
6273       do {
6274         j[p] += PETSC_MAX_INT;
6275         p++;
6276       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6277       Annz++;
6278     }
6279 
6280     for (p = mid; p < s;) {
6281       col = j[p];
6282       do {
6283         p++;
6284       } while (p < s && j[p] == col);
6285       Bnnz++;
6286     }
6287     k = s;
6288   }
6289 
6290   /* Allocation according to Atot, Btot, Annz, Bnnz */
6291   PetscCall(PetscMalloc1(Atot, &Aperm));
6292   PetscCall(PetscMalloc1(Btot, &Bperm));
6293   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6294   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6295 
6296   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6297   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6298   for (r = 0; r < m; r++) {
6299     k   = rowBegin[r];
6300     mid = rowMid[r];
6301     s   = rowEnd[r];
6302     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6303     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6304     Atot += mid - k;
6305     Btot += s - mid;
6306 
6307     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6308     for (p = k; p < mid;) {
6309       col = j[p];
6310       q   = p;
6311       do {
6312         p++;
6313       } while (p < mid && j[p] == col);
6314       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6315       Annz++;
6316     }
6317 
6318     for (p = mid; p < s;) {
6319       col = j[p];
6320       q   = p;
6321       do {
6322         p++;
6323       } while (p < s && j[p] == col);
6324       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6325       Bnnz++;
6326     }
6327   }
6328   /* Output */
6329   *Aperm_ = Aperm;
6330   *Annz_  = Annz;
6331   *Atot_  = Atot;
6332   *Ajmap_ = Ajmap;
6333   *Bperm_ = Bperm;
6334   *Bnnz_  = Bnnz;
6335   *Btot_  = Btot;
6336   *Bjmap_ = Bjmap;
6337   PetscFunctionReturn(PETSC_SUCCESS);
6338 }
6339 
6340 /*
6341   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6342 
6343   Input Parameters:
6344     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6345     nnz:  number of unique nonzeros in the merged matrix
6346     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6347     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6348 
6349   Output Parameter: (memory is allocated by the caller)
6350     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6351 
6352   Example:
6353     nnz1 = 4
6354     nnz  = 6
6355     imap = [1,3,4,5]
6356     jmap = [0,3,5,6,7]
6357    then,
6358     jmap_new = [0,0,3,3,5,6,7]
6359 */
6360 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6361 {
6362   PetscCount k, p;
6363 
6364   PetscFunctionBegin;
6365   jmap_new[0] = 0;
6366   p           = nnz;                /* p loops over jmap_new[] backwards */
6367   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6368     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6369   }
6370   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6371   PetscFunctionReturn(PETSC_SUCCESS);
6372 }
6373 
6374 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6375 {
6376   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6377 
6378   PetscFunctionBegin;
6379   PetscCall(PetscSFDestroy(&coo->sf));
6380   PetscCall(PetscFree(coo->Aperm1));
6381   PetscCall(PetscFree(coo->Bperm1));
6382   PetscCall(PetscFree(coo->Ajmap1));
6383   PetscCall(PetscFree(coo->Bjmap1));
6384   PetscCall(PetscFree(coo->Aimap2));
6385   PetscCall(PetscFree(coo->Bimap2));
6386   PetscCall(PetscFree(coo->Aperm2));
6387   PetscCall(PetscFree(coo->Bperm2));
6388   PetscCall(PetscFree(coo->Ajmap2));
6389   PetscCall(PetscFree(coo->Bjmap2));
6390   PetscCall(PetscFree(coo->Cperm1));
6391   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6392   PetscCall(PetscFree(coo));
6393   PetscFunctionReturn(PETSC_SUCCESS);
6394 }
6395 
6396 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6397 {
6398   MPI_Comm             comm;
6399   PetscMPIInt          rank, size;
6400   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6401   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6402   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6403   PetscContainer       container;
6404   MatCOOStruct_MPIAIJ *coo;
6405 
6406   PetscFunctionBegin;
6407   PetscCall(PetscFree(mpiaij->garray));
6408   PetscCall(VecDestroy(&mpiaij->lvec));
6409 #if defined(PETSC_USE_CTABLE)
6410   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6411 #else
6412   PetscCall(PetscFree(mpiaij->colmap));
6413 #endif
6414   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6415   mat->assembled     = PETSC_FALSE;
6416   mat->was_assembled = PETSC_FALSE;
6417 
6418   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6419   PetscCallMPI(MPI_Comm_size(comm, &size));
6420   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6421   PetscCall(PetscLayoutSetUp(mat->rmap));
6422   PetscCall(PetscLayoutSetUp(mat->cmap));
6423   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6424   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6425   PetscCall(MatGetLocalSize(mat, &m, &n));
6426   PetscCall(MatGetSize(mat, &M, &N));
6427 
6428   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6429   /* entries come first, then local rows, then remote rows.                     */
6430   PetscCount n1 = coo_n, *perm1;
6431   PetscInt  *i1 = coo_i, *j1 = coo_j;
6432 
6433   PetscCall(PetscMalloc1(n1, &perm1));
6434   for (k = 0; k < n1; k++) perm1[k] = k;
6435 
6436   /* Manipulate indices so that entries with negative row or col indices will have smallest
6437      row indices, local entries will have greater but negative row indices, and remote entries
6438      will have positive row indices.
6439   */
6440   for (k = 0; k < n1; k++) {
6441     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6442     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6443     else {
6444       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6445       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6446     }
6447   }
6448 
6449   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6450   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6451   for (k = 0; k < n1; k++) {
6452     if (i1[k] > PETSC_MIN_INT) break;
6453   }                                                                               /* Advance k to the first entry we need to take care of */
6454   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6455   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6456 
6457   /*           Split local rows into diag/offdiag portions                      */
6458   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6459   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6460   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6461 
6462   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6463   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6464   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6465 
6466   /*           Send remote rows to their owner                                  */
6467   /* Find which rows should be sent to which remote ranks*/
6468   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6469   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6470   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6471   const PetscInt *ranges;
6472   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6473 
6474   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6475   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6476   for (k = rem; k < n1;) {
6477     PetscMPIInt owner;
6478     PetscInt    firstRow, lastRow;
6479 
6480     /* Locate a row range */
6481     firstRow = i1[k]; /* first row of this owner */
6482     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6483     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6484 
6485     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6486     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6487 
6488     /* All entries in [k,p) belong to this remote owner */
6489     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6490       PetscMPIInt *sendto2;
6491       PetscInt    *nentries2;
6492       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6493 
6494       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6495       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6496       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6497       PetscCall(PetscFree2(sendto, nentries2));
6498       sendto   = sendto2;
6499       nentries = nentries2;
6500       maxNsend = maxNsend2;
6501     }
6502     sendto[nsend]   = owner;
6503     nentries[nsend] = p - k;
6504     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6505     nsend++;
6506     k = p;
6507   }
6508 
6509   /* Build 1st SF to know offsets on remote to send data */
6510   PetscSF      sf1;
6511   PetscInt     nroots = 1, nroots2 = 0;
6512   PetscInt     nleaves = nsend, nleaves2 = 0;
6513   PetscInt    *offsets;
6514   PetscSFNode *iremote;
6515 
6516   PetscCall(PetscSFCreate(comm, &sf1));
6517   PetscCall(PetscMalloc1(nsend, &iremote));
6518   PetscCall(PetscMalloc1(nsend, &offsets));
6519   for (k = 0; k < nsend; k++) {
6520     iremote[k].rank  = sendto[k];
6521     iremote[k].index = 0;
6522     nleaves2 += nentries[k];
6523     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6524   }
6525   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6526   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6527   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6528   PetscCall(PetscSFDestroy(&sf1));
6529   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6530 
6531   /* Build 2nd SF to send remote COOs to their owner */
6532   PetscSF sf2;
6533   nroots  = nroots2;
6534   nleaves = nleaves2;
6535   PetscCall(PetscSFCreate(comm, &sf2));
6536   PetscCall(PetscSFSetFromOptions(sf2));
6537   PetscCall(PetscMalloc1(nleaves, &iremote));
6538   p = 0;
6539   for (k = 0; k < nsend; k++) {
6540     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6541     for (q = 0; q < nentries[k]; q++, p++) {
6542       iremote[p].rank  = sendto[k];
6543       iremote[p].index = offsets[k] + q;
6544     }
6545   }
6546   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6547 
6548   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6549   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6550 
6551   /* Send the remote COOs to their owner */
6552   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6553   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6554   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6555   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6556   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6557   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6558   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6559 
6560   PetscCall(PetscFree(offsets));
6561   PetscCall(PetscFree2(sendto, nentries));
6562 
6563   /* Sort received COOs by row along with the permutation array     */
6564   for (k = 0; k < n2; k++) perm2[k] = k;
6565   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6566 
6567   /* Split received COOs into diag/offdiag portions                 */
6568   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6569   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6570   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6571 
6572   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6573   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6574 
6575   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6576   PetscInt *Ai, *Bi;
6577   PetscInt *Aj, *Bj;
6578 
6579   PetscCall(PetscMalloc1(m + 1, &Ai));
6580   PetscCall(PetscMalloc1(m + 1, &Bi));
6581   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6582   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6583 
6584   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6585   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6586   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6587   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6588   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6589 
6590   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6591   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6592 
6593   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6594   /* expect nonzeros in A/B most likely have local contributing entries        */
6595   PetscInt    Annz = Ai[m];
6596   PetscInt    Bnnz = Bi[m];
6597   PetscCount *Ajmap1_new, *Bjmap1_new;
6598 
6599   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6600   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6601 
6602   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6603   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6604 
6605   PetscCall(PetscFree(Aimap1));
6606   PetscCall(PetscFree(Ajmap1));
6607   PetscCall(PetscFree(Bimap1));
6608   PetscCall(PetscFree(Bjmap1));
6609   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6610   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6611   PetscCall(PetscFree(perm1));
6612   PetscCall(PetscFree3(i2, j2, perm2));
6613 
6614   Ajmap1 = Ajmap1_new;
6615   Bjmap1 = Bjmap1_new;
6616 
6617   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6618   if (Annz < Annz1 + Annz2) {
6619     PetscInt *Aj_new;
6620     PetscCall(PetscMalloc1(Annz, &Aj_new));
6621     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6622     PetscCall(PetscFree(Aj));
6623     Aj = Aj_new;
6624   }
6625 
6626   if (Bnnz < Bnnz1 + Bnnz2) {
6627     PetscInt *Bj_new;
6628     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6629     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6630     PetscCall(PetscFree(Bj));
6631     Bj = Bj_new;
6632   }
6633 
6634   /* Create new submatrices for on-process and off-process coupling                  */
6635   PetscScalar *Aa, *Ba;
6636   MatType      rtype;
6637   Mat_SeqAIJ  *a, *b;
6638   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6639   PetscCall(PetscCalloc1(Bnnz, &Ba));
6640   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6641   if (cstart) {
6642     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6643   }
6644   PetscCall(MatDestroy(&mpiaij->A));
6645   PetscCall(MatDestroy(&mpiaij->B));
6646   PetscCall(MatGetRootType_Private(mat, &rtype));
6647   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6648   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6649   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6650 
6651   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6652   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6653   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6654   a->free_a = b->free_a = PETSC_TRUE;
6655   a->free_ij = b->free_ij = PETSC_TRUE;
6656 
6657   /* conversion must happen AFTER multiply setup */
6658   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6659   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6660   PetscCall(VecDestroy(&mpiaij->lvec));
6661   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6662 
6663   // Put the COO struct in a container and then attach that to the matrix
6664   PetscCall(PetscMalloc1(1, &coo));
6665   coo->n       = coo_n;
6666   coo->sf      = sf2;
6667   coo->sendlen = nleaves;
6668   coo->recvlen = nroots;
6669   coo->Annz    = Annz;
6670   coo->Bnnz    = Bnnz;
6671   coo->Annz2   = Annz2;
6672   coo->Bnnz2   = Bnnz2;
6673   coo->Atot1   = Atot1;
6674   coo->Atot2   = Atot2;
6675   coo->Btot1   = Btot1;
6676   coo->Btot2   = Btot2;
6677   coo->Ajmap1  = Ajmap1;
6678   coo->Aperm1  = Aperm1;
6679   coo->Bjmap1  = Bjmap1;
6680   coo->Bperm1  = Bperm1;
6681   coo->Aimap2  = Aimap2;
6682   coo->Ajmap2  = Ajmap2;
6683   coo->Aperm2  = Aperm2;
6684   coo->Bimap2  = Bimap2;
6685   coo->Bjmap2  = Bjmap2;
6686   coo->Bperm2  = Bperm2;
6687   coo->Cperm1  = Cperm1;
6688   // Allocate in preallocation. If not used, it has zero cost on host
6689   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6690   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6691   PetscCall(PetscContainerSetPointer(container, coo));
6692   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6693   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6694   PetscCall(PetscContainerDestroy(&container));
6695   PetscFunctionReturn(PETSC_SUCCESS);
6696 }
6697 
6698 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6699 {
6700   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6701   Mat                  A = mpiaij->A, B = mpiaij->B;
6702   PetscScalar         *Aa, *Ba;
6703   PetscScalar         *sendbuf, *recvbuf;
6704   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6705   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6706   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6707   const PetscCount    *Cperm1;
6708   PetscContainer       container;
6709   MatCOOStruct_MPIAIJ *coo;
6710 
6711   PetscFunctionBegin;
6712   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6713   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6714   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6715   sendbuf = coo->sendbuf;
6716   recvbuf = coo->recvbuf;
6717   Ajmap1  = coo->Ajmap1;
6718   Ajmap2  = coo->Ajmap2;
6719   Aimap2  = coo->Aimap2;
6720   Bjmap1  = coo->Bjmap1;
6721   Bjmap2  = coo->Bjmap2;
6722   Bimap2  = coo->Bimap2;
6723   Aperm1  = coo->Aperm1;
6724   Aperm2  = coo->Aperm2;
6725   Bperm1  = coo->Bperm1;
6726   Bperm2  = coo->Bperm2;
6727   Cperm1  = coo->Cperm1;
6728 
6729   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6730   PetscCall(MatSeqAIJGetArray(B, &Ba));
6731 
6732   /* Pack entries to be sent to remote */
6733   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6734 
6735   /* Send remote entries to their owner and overlap the communication with local computation */
6736   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6737   /* Add local entries to A and B */
6738   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6739     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6740     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6741     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6742   }
6743   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6744     PetscScalar sum = 0.0;
6745     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6746     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6747   }
6748   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6749 
6750   /* Add received remote entries to A and B */
6751   for (PetscCount i = 0; i < coo->Annz2; i++) {
6752     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6753   }
6754   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6755     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6756   }
6757   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6758   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6759   PetscFunctionReturn(PETSC_SUCCESS);
6760 }
6761 
6762 /*MC
6763    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6764 
6765    Options Database Keys:
6766 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6767 
6768    Level: beginner
6769 
6770    Notes:
6771    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6772     in this case the values associated with the rows and columns one passes in are set to zero
6773     in the matrix
6774 
6775     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6776     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6777 
6778 .seealso: [](chapter_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6779 M*/
6780 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6781 {
6782   Mat_MPIAIJ *b;
6783   PetscMPIInt size;
6784 
6785   PetscFunctionBegin;
6786   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6787 
6788   PetscCall(PetscNew(&b));
6789   B->data = (void *)b;
6790   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6791   B->assembled  = PETSC_FALSE;
6792   B->insertmode = NOT_SET_VALUES;
6793   b->size       = size;
6794 
6795   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6796 
6797   /* build cache for off array entries formed */
6798   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6799 
6800   b->donotstash  = PETSC_FALSE;
6801   b->colmap      = NULL;
6802   b->garray      = NULL;
6803   b->roworiented = PETSC_TRUE;
6804 
6805   /* stuff used for matrix vector multiply */
6806   b->lvec  = NULL;
6807   b->Mvctx = NULL;
6808 
6809   /* stuff for MatGetRow() */
6810   b->rowindices   = NULL;
6811   b->rowvalues    = NULL;
6812   b->getrowactive = PETSC_FALSE;
6813 
6814   /* flexible pointer used in CUSPARSE classes */
6815   b->spptr = NULL;
6816 
6817   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6818   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6819   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6820   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6821   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6822   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6823   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6824   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6825   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6826   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6827 #if defined(PETSC_HAVE_CUDA)
6828   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6829 #endif
6830 #if defined(PETSC_HAVE_HIP)
6831   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6832 #endif
6833 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6834   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6835 #endif
6836 #if defined(PETSC_HAVE_MKL_SPARSE)
6837   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6838 #endif
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6842   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6843 #if defined(PETSC_HAVE_ELEMENTAL)
6844   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6845 #endif
6846 #if defined(PETSC_HAVE_SCALAPACK)
6847   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6848 #endif
6849   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6850   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6851 #if defined(PETSC_HAVE_HYPRE)
6852   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6853   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6854 #endif
6855   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6856   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6858   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6859   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6860   PetscFunctionReturn(PETSC_SUCCESS);
6861 }
6862 
6863 /*@C
6864      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6865          and "off-diagonal" part of the matrix in CSR format.
6866 
6867    Collective
6868 
6869    Input Parameters:
6870 +  comm - MPI communicator
6871 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6872 .  n - This value should be the same as the local size used in creating the
6873        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6874        calculated if `N` is given) For square matrices `n` is almost always `m`.
6875 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6876 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6877 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6878 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6879 .   a - matrix values
6880 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6881 .   oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6882 -   oa - matrix values
6883 
6884    Output Parameter:
6885 .   mat - the matrix
6886 
6887    Level: advanced
6888 
6889    Notes:
6890        The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6891        must free the arrays once the matrix has been destroyed and not before.
6892 
6893        The `i` and `j` indices are 0 based
6894 
6895        See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6896 
6897        This sets local rows and cannot be used to set off-processor values.
6898 
6899        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6900        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6901        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6902        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6903        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6904        communication if it is known that only local entries will be set.
6905 
6906 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6907           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6908 @*/
6909 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6910 {
6911   Mat_MPIAIJ *maij;
6912 
6913   PetscFunctionBegin;
6914   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6915   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6916   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6917   PetscCall(MatCreate(comm, mat));
6918   PetscCall(MatSetSizes(*mat, m, n, M, N));
6919   PetscCall(MatSetType(*mat, MATMPIAIJ));
6920   maij = (Mat_MPIAIJ *)(*mat)->data;
6921 
6922   (*mat)->preallocated = PETSC_TRUE;
6923 
6924   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6925   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6926 
6927   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6928   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6929 
6930   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6931   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6932   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6933   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6934   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6935   PetscFunctionReturn(PETSC_SUCCESS);
6936 }
6937 
6938 typedef struct {
6939   Mat       *mp;    /* intermediate products */
6940   PetscBool *mptmp; /* is the intermediate product temporary ? */
6941   PetscInt   cp;    /* number of intermediate products */
6942 
6943   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6944   PetscInt    *startsj_s, *startsj_r;
6945   PetscScalar *bufa;
6946   Mat          P_oth;
6947 
6948   /* may take advantage of merging product->B */
6949   Mat Bloc; /* B-local by merging diag and off-diag */
6950 
6951   /* cusparse does not have support to split between symbolic and numeric phases.
6952      When api_user is true, we don't need to update the numerical values
6953      of the temporary storage */
6954   PetscBool reusesym;
6955 
6956   /* support for COO values insertion */
6957   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6958   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6959   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6960   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6961   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6962   PetscMemType mtype;
6963 
6964   /* customization */
6965   PetscBool abmerge;
6966   PetscBool P_oth_bind;
6967 } MatMatMPIAIJBACKEND;
6968 
6969 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6970 {
6971   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6972   PetscInt             i;
6973 
6974   PetscFunctionBegin;
6975   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6976   PetscCall(PetscFree(mmdata->bufa));
6977   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6978   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6979   PetscCall(MatDestroy(&mmdata->P_oth));
6980   PetscCall(MatDestroy(&mmdata->Bloc));
6981   PetscCall(PetscSFDestroy(&mmdata->sf));
6982   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6983   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6984   PetscCall(PetscFree(mmdata->own[0]));
6985   PetscCall(PetscFree(mmdata->own));
6986   PetscCall(PetscFree(mmdata->off[0]));
6987   PetscCall(PetscFree(mmdata->off));
6988   PetscCall(PetscFree(mmdata));
6989   PetscFunctionReturn(PETSC_SUCCESS);
6990 }
6991 
6992 /* Copy selected n entries with indices in idx[] of A to v[].
6993    If idx is NULL, copy the whole data array of A to v[]
6994  */
6995 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6996 {
6997   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
6998 
6999   PetscFunctionBegin;
7000   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7001   if (f) {
7002     PetscCall((*f)(A, n, idx, v));
7003   } else {
7004     const PetscScalar *vv;
7005 
7006     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7007     if (n && idx) {
7008       PetscScalar    *w  = v;
7009       const PetscInt *oi = idx;
7010       PetscInt        j;
7011 
7012       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7013     } else {
7014       PetscCall(PetscArraycpy(v, vv, n));
7015     }
7016     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7017   }
7018   PetscFunctionReturn(PETSC_SUCCESS);
7019 }
7020 
7021 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7022 {
7023   MatMatMPIAIJBACKEND *mmdata;
7024   PetscInt             i, n_d, n_o;
7025 
7026   PetscFunctionBegin;
7027   MatCheckProduct(C, 1);
7028   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7029   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7030   if (!mmdata->reusesym) { /* update temporary matrices */
7031     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7032     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7033   }
7034   mmdata->reusesym = PETSC_FALSE;
7035 
7036   for (i = 0; i < mmdata->cp; i++) {
7037     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7038     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7039   }
7040   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7041     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7042 
7043     if (mmdata->mptmp[i]) continue;
7044     if (noff) {
7045       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7046 
7047       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7048       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7049       n_o += noff;
7050       n_d += nown;
7051     } else {
7052       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7053 
7054       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7055       n_d += mm->nz;
7056     }
7057   }
7058   if (mmdata->hasoffproc) { /* offprocess insertion */
7059     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7060     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7061   }
7062   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7063   PetscFunctionReturn(PETSC_SUCCESS);
7064 }
7065 
7066 /* Support for Pt * A, A * P, or Pt * A * P */
7067 #define MAX_NUMBER_INTERMEDIATE 4
7068 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7069 {
7070   Mat_Product           *product = C->product;
7071   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7072   Mat_MPIAIJ            *a, *p;
7073   MatMatMPIAIJBACKEND   *mmdata;
7074   ISLocalToGlobalMapping P_oth_l2g = NULL;
7075   IS                     glob      = NULL;
7076   const char            *prefix;
7077   char                   pprefix[256];
7078   const PetscInt        *globidx, *P_oth_idx;
7079   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7080   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7081   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7082                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7083                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7084   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7085 
7086   MatProductType ptype;
7087   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7088   PetscMPIInt    size;
7089 
7090   PetscFunctionBegin;
7091   MatCheckProduct(C, 1);
7092   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7093   ptype = product->type;
7094   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7095     ptype                                          = MATPRODUCT_AB;
7096     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7097   }
7098   switch (ptype) {
7099   case MATPRODUCT_AB:
7100     A          = product->A;
7101     P          = product->B;
7102     m          = A->rmap->n;
7103     n          = P->cmap->n;
7104     M          = A->rmap->N;
7105     N          = P->cmap->N;
7106     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7107     break;
7108   case MATPRODUCT_AtB:
7109     P          = product->A;
7110     A          = product->B;
7111     m          = P->cmap->n;
7112     n          = A->cmap->n;
7113     M          = P->cmap->N;
7114     N          = A->cmap->N;
7115     hasoffproc = PETSC_TRUE;
7116     break;
7117   case MATPRODUCT_PtAP:
7118     A          = product->A;
7119     P          = product->B;
7120     m          = P->cmap->n;
7121     n          = P->cmap->n;
7122     M          = P->cmap->N;
7123     N          = P->cmap->N;
7124     hasoffproc = PETSC_TRUE;
7125     break;
7126   default:
7127     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7128   }
7129   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7130   if (size == 1) hasoffproc = PETSC_FALSE;
7131 
7132   /* defaults */
7133   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7134     mp[i]    = NULL;
7135     mptmp[i] = PETSC_FALSE;
7136     rmapt[i] = -1;
7137     cmapt[i] = -1;
7138     rmapa[i] = NULL;
7139     cmapa[i] = NULL;
7140   }
7141 
7142   /* customization */
7143   PetscCall(PetscNew(&mmdata));
7144   mmdata->reusesym = product->api_user;
7145   if (ptype == MATPRODUCT_AB) {
7146     if (product->api_user) {
7147       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7148       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7149       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7150       PetscOptionsEnd();
7151     } else {
7152       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7153       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7154       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7155       PetscOptionsEnd();
7156     }
7157   } else if (ptype == MATPRODUCT_PtAP) {
7158     if (product->api_user) {
7159       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7160       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7161       PetscOptionsEnd();
7162     } else {
7163       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7164       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7165       PetscOptionsEnd();
7166     }
7167   }
7168   a = (Mat_MPIAIJ *)A->data;
7169   p = (Mat_MPIAIJ *)P->data;
7170   PetscCall(MatSetSizes(C, m, n, M, N));
7171   PetscCall(PetscLayoutSetUp(C->rmap));
7172   PetscCall(PetscLayoutSetUp(C->cmap));
7173   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7174   PetscCall(MatGetOptionsPrefix(C, &prefix));
7175 
7176   cp = 0;
7177   switch (ptype) {
7178   case MATPRODUCT_AB: /* A * P */
7179     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7180 
7181     /* A_diag * P_local (merged or not) */
7182     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7183       /* P is product->B */
7184       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7185       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7186       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7187       PetscCall(MatProductSetFill(mp[cp], product->fill));
7188       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7189       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7190       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7191       mp[cp]->product->api_user = product->api_user;
7192       PetscCall(MatProductSetFromOptions(mp[cp]));
7193       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7194       PetscCall(ISGetIndices(glob, &globidx));
7195       rmapt[cp] = 1;
7196       cmapt[cp] = 2;
7197       cmapa[cp] = globidx;
7198       mptmp[cp] = PETSC_FALSE;
7199       cp++;
7200     } else { /* A_diag * P_diag and A_diag * P_off */
7201       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7202       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7203       PetscCall(MatProductSetFill(mp[cp], product->fill));
7204       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7205       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7206       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7207       mp[cp]->product->api_user = product->api_user;
7208       PetscCall(MatProductSetFromOptions(mp[cp]));
7209       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7210       rmapt[cp] = 1;
7211       cmapt[cp] = 1;
7212       mptmp[cp] = PETSC_FALSE;
7213       cp++;
7214       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7215       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7216       PetscCall(MatProductSetFill(mp[cp], product->fill));
7217       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7218       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7219       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7220       mp[cp]->product->api_user = product->api_user;
7221       PetscCall(MatProductSetFromOptions(mp[cp]));
7222       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7223       rmapt[cp] = 1;
7224       cmapt[cp] = 2;
7225       cmapa[cp] = p->garray;
7226       mptmp[cp] = PETSC_FALSE;
7227       cp++;
7228     }
7229 
7230     /* A_off * P_other */
7231     if (mmdata->P_oth) {
7232       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7233       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7234       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7235       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7236       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7237       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7238       PetscCall(MatProductSetFill(mp[cp], product->fill));
7239       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7240       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7241       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7242       mp[cp]->product->api_user = product->api_user;
7243       PetscCall(MatProductSetFromOptions(mp[cp]));
7244       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7245       rmapt[cp] = 1;
7246       cmapt[cp] = 2;
7247       cmapa[cp] = P_oth_idx;
7248       mptmp[cp] = PETSC_FALSE;
7249       cp++;
7250     }
7251     break;
7252 
7253   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7254     /* A is product->B */
7255     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7256     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7257       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7258       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7259       PetscCall(MatProductSetFill(mp[cp], product->fill));
7260       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7261       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7262       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7263       mp[cp]->product->api_user = product->api_user;
7264       PetscCall(MatProductSetFromOptions(mp[cp]));
7265       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7266       PetscCall(ISGetIndices(glob, &globidx));
7267       rmapt[cp] = 2;
7268       rmapa[cp] = globidx;
7269       cmapt[cp] = 2;
7270       cmapa[cp] = globidx;
7271       mptmp[cp] = PETSC_FALSE;
7272       cp++;
7273     } else {
7274       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7275       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7276       PetscCall(MatProductSetFill(mp[cp], product->fill));
7277       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7278       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7279       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7280       mp[cp]->product->api_user = product->api_user;
7281       PetscCall(MatProductSetFromOptions(mp[cp]));
7282       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7283       PetscCall(ISGetIndices(glob, &globidx));
7284       rmapt[cp] = 1;
7285       cmapt[cp] = 2;
7286       cmapa[cp] = globidx;
7287       mptmp[cp] = PETSC_FALSE;
7288       cp++;
7289       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7290       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7291       PetscCall(MatProductSetFill(mp[cp], product->fill));
7292       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7293       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7294       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7295       mp[cp]->product->api_user = product->api_user;
7296       PetscCall(MatProductSetFromOptions(mp[cp]));
7297       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7298       rmapt[cp] = 2;
7299       rmapa[cp] = p->garray;
7300       cmapt[cp] = 2;
7301       cmapa[cp] = globidx;
7302       mptmp[cp] = PETSC_FALSE;
7303       cp++;
7304     }
7305     break;
7306   case MATPRODUCT_PtAP:
7307     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7308     /* P is product->B */
7309     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7310     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7311     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7312     PetscCall(MatProductSetFill(mp[cp], product->fill));
7313     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7314     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7315     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7316     mp[cp]->product->api_user = product->api_user;
7317     PetscCall(MatProductSetFromOptions(mp[cp]));
7318     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7319     PetscCall(ISGetIndices(glob, &globidx));
7320     rmapt[cp] = 2;
7321     rmapa[cp] = globidx;
7322     cmapt[cp] = 2;
7323     cmapa[cp] = globidx;
7324     mptmp[cp] = PETSC_FALSE;
7325     cp++;
7326     if (mmdata->P_oth) {
7327       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7328       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7329       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7330       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7331       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7332       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7333       PetscCall(MatProductSetFill(mp[cp], product->fill));
7334       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7335       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7336       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7337       mp[cp]->product->api_user = product->api_user;
7338       PetscCall(MatProductSetFromOptions(mp[cp]));
7339       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7340       mptmp[cp] = PETSC_TRUE;
7341       cp++;
7342       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7343       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7344       PetscCall(MatProductSetFill(mp[cp], product->fill));
7345       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7346       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7347       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7348       mp[cp]->product->api_user = product->api_user;
7349       PetscCall(MatProductSetFromOptions(mp[cp]));
7350       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7351       rmapt[cp] = 2;
7352       rmapa[cp] = globidx;
7353       cmapt[cp] = 2;
7354       cmapa[cp] = P_oth_idx;
7355       mptmp[cp] = PETSC_FALSE;
7356       cp++;
7357     }
7358     break;
7359   default:
7360     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7361   }
7362   /* sanity check */
7363   if (size > 1)
7364     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7365 
7366   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7367   for (i = 0; i < cp; i++) {
7368     mmdata->mp[i]    = mp[i];
7369     mmdata->mptmp[i] = mptmp[i];
7370   }
7371   mmdata->cp             = cp;
7372   C->product->data       = mmdata;
7373   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7374   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7375 
7376   /* memory type */
7377   mmdata->mtype = PETSC_MEMTYPE_HOST;
7378   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7379   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7380   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7381   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7382   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7383   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7384 
7385   /* prepare coo coordinates for values insertion */
7386 
7387   /* count total nonzeros of those intermediate seqaij Mats
7388     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7389     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7390     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7391   */
7392   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7393     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7394     if (mptmp[cp]) continue;
7395     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7396       const PetscInt *rmap = rmapa[cp];
7397       const PetscInt  mr   = mp[cp]->rmap->n;
7398       const PetscInt  rs   = C->rmap->rstart;
7399       const PetscInt  re   = C->rmap->rend;
7400       const PetscInt *ii   = mm->i;
7401       for (i = 0; i < mr; i++) {
7402         const PetscInt gr = rmap[i];
7403         const PetscInt nz = ii[i + 1] - ii[i];
7404         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7405         else ncoo_oown += nz;                  /* this row is local */
7406       }
7407     } else ncoo_d += mm->nz;
7408   }
7409 
7410   /*
7411     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7412 
7413     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7414 
7415     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7416 
7417     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7418     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7419     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7420 
7421     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7422     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7423   */
7424   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7425   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7426 
7427   /* gather (i,j) of nonzeros inserted by remote procs */
7428   if (hasoffproc) {
7429     PetscSF  msf;
7430     PetscInt ncoo2, *coo_i2, *coo_j2;
7431 
7432     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7433     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7434     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7435 
7436     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7437       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7438       PetscInt   *idxoff = mmdata->off[cp];
7439       PetscInt   *idxown = mmdata->own[cp];
7440       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7441         const PetscInt *rmap = rmapa[cp];
7442         const PetscInt *cmap = cmapa[cp];
7443         const PetscInt *ii   = mm->i;
7444         PetscInt       *coi  = coo_i + ncoo_o;
7445         PetscInt       *coj  = coo_j + ncoo_o;
7446         const PetscInt  mr   = mp[cp]->rmap->n;
7447         const PetscInt  rs   = C->rmap->rstart;
7448         const PetscInt  re   = C->rmap->rend;
7449         const PetscInt  cs   = C->cmap->rstart;
7450         for (i = 0; i < mr; i++) {
7451           const PetscInt *jj = mm->j + ii[i];
7452           const PetscInt  gr = rmap[i];
7453           const PetscInt  nz = ii[i + 1] - ii[i];
7454           if (gr < rs || gr >= re) { /* this is an offproc row */
7455             for (j = ii[i]; j < ii[i + 1]; j++) {
7456               *coi++    = gr;
7457               *idxoff++ = j;
7458             }
7459             if (!cmapt[cp]) { /* already global */
7460               for (j = 0; j < nz; j++) *coj++ = jj[j];
7461             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7462               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7463             } else { /* offdiag */
7464               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7465             }
7466             ncoo_o += nz;
7467           } else { /* this is a local row */
7468             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7469           }
7470         }
7471       }
7472       mmdata->off[cp + 1] = idxoff;
7473       mmdata->own[cp + 1] = idxown;
7474     }
7475 
7476     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7477     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7478     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7479     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7480     ncoo = ncoo_d + ncoo_oown + ncoo2;
7481     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7482     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7483     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7484     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7485     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7486     PetscCall(PetscFree2(coo_i, coo_j));
7487     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7488     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7489     coo_i = coo_i2;
7490     coo_j = coo_j2;
7491   } else { /* no offproc values insertion */
7492     ncoo = ncoo_d;
7493     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7494 
7495     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7496     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7497     PetscCall(PetscSFSetUp(mmdata->sf));
7498   }
7499   mmdata->hasoffproc = hasoffproc;
7500 
7501   /* gather (i,j) of nonzeros inserted locally */
7502   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7503     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7504     PetscInt       *coi  = coo_i + ncoo_d;
7505     PetscInt       *coj  = coo_j + ncoo_d;
7506     const PetscInt *jj   = mm->j;
7507     const PetscInt *ii   = mm->i;
7508     const PetscInt *cmap = cmapa[cp];
7509     const PetscInt *rmap = rmapa[cp];
7510     const PetscInt  mr   = mp[cp]->rmap->n;
7511     const PetscInt  rs   = C->rmap->rstart;
7512     const PetscInt  re   = C->rmap->rend;
7513     const PetscInt  cs   = C->cmap->rstart;
7514 
7515     if (mptmp[cp]) continue;
7516     if (rmapt[cp] == 1) { /* consecutive rows */
7517       /* fill coo_i */
7518       for (i = 0; i < mr; i++) {
7519         const PetscInt gr = i + rs;
7520         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7521       }
7522       /* fill coo_j */
7523       if (!cmapt[cp]) { /* type-0, already global */
7524         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7525       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7526         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7527       } else {                                            /* type-2, local to global for sparse columns */
7528         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7529       }
7530       ncoo_d += mm->nz;
7531     } else if (rmapt[cp] == 2) { /* sparse rows */
7532       for (i = 0; i < mr; i++) {
7533         const PetscInt *jj = mm->j + ii[i];
7534         const PetscInt  gr = rmap[i];
7535         const PetscInt  nz = ii[i + 1] - ii[i];
7536         if (gr >= rs && gr < re) { /* local rows */
7537           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7538           if (!cmapt[cp]) { /* type-0, already global */
7539             for (j = 0; j < nz; j++) *coj++ = jj[j];
7540           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7541             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7542           } else { /* type-2, local to global for sparse columns */
7543             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7544           }
7545           ncoo_d += nz;
7546         }
7547       }
7548     }
7549   }
7550   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7551   PetscCall(ISDestroy(&glob));
7552   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7553   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7554   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7555   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7556 
7557   /* preallocate with COO data */
7558   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7559   PetscCall(PetscFree2(coo_i, coo_j));
7560   PetscFunctionReturn(PETSC_SUCCESS);
7561 }
7562 
7563 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7564 {
7565   Mat_Product *product = mat->product;
7566 #if defined(PETSC_HAVE_DEVICE)
7567   PetscBool match  = PETSC_FALSE;
7568   PetscBool usecpu = PETSC_FALSE;
7569 #else
7570   PetscBool match = PETSC_TRUE;
7571 #endif
7572 
7573   PetscFunctionBegin;
7574   MatCheckProduct(mat, 1);
7575 #if defined(PETSC_HAVE_DEVICE)
7576   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7577   if (match) { /* we can always fallback to the CPU if requested */
7578     switch (product->type) {
7579     case MATPRODUCT_AB:
7580       if (product->api_user) {
7581         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7582         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7583         PetscOptionsEnd();
7584       } else {
7585         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7586         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7587         PetscOptionsEnd();
7588       }
7589       break;
7590     case MATPRODUCT_AtB:
7591       if (product->api_user) {
7592         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7593         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7594         PetscOptionsEnd();
7595       } else {
7596         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7597         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7598         PetscOptionsEnd();
7599       }
7600       break;
7601     case MATPRODUCT_PtAP:
7602       if (product->api_user) {
7603         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7604         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7605         PetscOptionsEnd();
7606       } else {
7607         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7608         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7609         PetscOptionsEnd();
7610       }
7611       break;
7612     default:
7613       break;
7614     }
7615     match = (PetscBool)!usecpu;
7616   }
7617 #endif
7618   if (match) {
7619     switch (product->type) {
7620     case MATPRODUCT_AB:
7621     case MATPRODUCT_AtB:
7622     case MATPRODUCT_PtAP:
7623       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7624       break;
7625     default:
7626       break;
7627     }
7628   }
7629   /* fallback to MPIAIJ ops */
7630   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7631   PetscFunctionReturn(PETSC_SUCCESS);
7632 }
7633 
7634 /*
7635    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7636 
7637    n - the number of block indices in cc[]
7638    cc - the block indices (must be large enough to contain the indices)
7639 */
7640 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7641 {
7642   PetscInt        cnt = -1, nidx, j;
7643   const PetscInt *idx;
7644 
7645   PetscFunctionBegin;
7646   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7647   if (nidx) {
7648     cnt     = 0;
7649     cc[cnt] = idx[0] / bs;
7650     for (j = 1; j < nidx; j++) {
7651       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7652     }
7653   }
7654   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7655   *n = cnt + 1;
7656   PetscFunctionReturn(PETSC_SUCCESS);
7657 }
7658 
7659 /*
7660     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7661 
7662     ncollapsed - the number of block indices
7663     collapsed - the block indices (must be large enough to contain the indices)
7664 */
7665 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7666 {
7667   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7668 
7669   PetscFunctionBegin;
7670   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7671   for (i = start + 1; i < start + bs; i++) {
7672     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7673     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7674     cprevtmp = cprev;
7675     cprev    = merged;
7676     merged   = cprevtmp;
7677   }
7678   *ncollapsed = nprev;
7679   if (collapsed) *collapsed = cprev;
7680   PetscFunctionReturn(PETSC_SUCCESS);
7681 }
7682 
7683 /*
7684    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7685 */
7686 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7687 {
7688   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7689   Mat                tGmat;
7690   MPI_Comm           comm;
7691   const PetscScalar *vals;
7692   const PetscInt    *idx;
7693   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7694   MatScalar         *AA; // this is checked in graph
7695   PetscBool          isseqaij;
7696   Mat                a, b, c;
7697   MatType            jtype;
7698 
7699   PetscFunctionBegin;
7700   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7701   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7702   PetscCall(MatGetType(Gmat, &jtype));
7703   PetscCall(MatCreate(comm, &tGmat));
7704   PetscCall(MatSetType(tGmat, jtype));
7705 
7706   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7707                Also, if the matrix is symmetric, can we skip this
7708                operation? It can be very expensive on large matrices. */
7709 
7710   // global sizes
7711   PetscCall(MatGetSize(Gmat, &MM, &NN));
7712   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7713   nloc = Iend - Istart;
7714   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7715   if (isseqaij) {
7716     a = Gmat;
7717     b = NULL;
7718   } else {
7719     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7720     a             = d->A;
7721     b             = d->B;
7722     garray        = d->garray;
7723   }
7724   /* Determine upper bound on non-zeros needed in new filtered matrix */
7725   for (PetscInt row = 0; row < nloc; row++) {
7726     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7727     d_nnz[row] = ncols;
7728     if (ncols > maxcols) maxcols = ncols;
7729     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7730   }
7731   if (b) {
7732     for (PetscInt row = 0; row < nloc; row++) {
7733       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7734       o_nnz[row] = ncols;
7735       if (ncols > maxcols) maxcols = ncols;
7736       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7737     }
7738   }
7739   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7740   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7741   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7742   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7743   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7744   PetscCall(PetscFree2(d_nnz, o_nnz));
7745   //
7746   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7747   nnz0 = nnz1 = 0;
7748   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7749     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7750       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7751       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7752         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7753         if (PetscRealPart(sv) > vfilter) {
7754           nnz1++;
7755           PetscInt cid = idx[jj] + Istart; //diag
7756           if (c != a) cid = garray[idx[jj]];
7757           AA[ncol_row] = vals[jj];
7758           AJ[ncol_row] = cid;
7759           ncol_row++;
7760         }
7761       }
7762       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7763       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7764     }
7765   }
7766   PetscCall(PetscFree2(AA, AJ));
7767   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7768   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7769   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7770 
7771   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7772 
7773   *filteredG = tGmat;
7774   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7775   PetscFunctionReturn(PETSC_SUCCESS);
7776 }
7777 
7778 /*
7779  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7780 
7781  Input Parameter:
7782  . Amat - matrix
7783  - symmetrize - make the result symmetric
7784  + scale - scale with diagonal
7785 
7786  Output Parameter:
7787  . a_Gmat - output scalar graph >= 0
7788 
7789 */
7790 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7791 {
7792   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7793   MPI_Comm  comm;
7794   Mat       Gmat;
7795   PetscBool ismpiaij, isseqaij;
7796   Mat       a, b, c;
7797   MatType   jtype;
7798 
7799   PetscFunctionBegin;
7800   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7801   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7802   PetscCall(MatGetSize(Amat, &MM, &NN));
7803   PetscCall(MatGetBlockSize(Amat, &bs));
7804   nloc = (Iend - Istart) / bs;
7805 
7806   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7807   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7808   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7809 
7810   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7811   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7812      implementation */
7813   if (bs > 1) {
7814     PetscCall(MatGetType(Amat, &jtype));
7815     PetscCall(MatCreate(comm, &Gmat));
7816     PetscCall(MatSetType(Gmat, jtype));
7817     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7818     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7819     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7820       PetscInt  *d_nnz, *o_nnz;
7821       MatScalar *aa, val, *AA;
7822       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7823       if (isseqaij) {
7824         a = Amat;
7825         b = NULL;
7826       } else {
7827         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7828         a             = d->A;
7829         b             = d->B;
7830       }
7831       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7832       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7833       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7834         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7835         const PetscInt *cols1, *cols2;
7836         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7837           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7838           nnz[brow / bs] = nc2 / bs;
7839           if (nc2 % bs) ok = 0;
7840           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7841           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7842             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7843             if (nc1 != nc2) ok = 0;
7844             else {
7845               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7846                 if (cols1[jj] != cols2[jj]) ok = 0;
7847                 if (cols1[jj] % bs != jj % bs) ok = 0;
7848               }
7849             }
7850             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7851           }
7852           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7853           if (!ok) {
7854             PetscCall(PetscFree2(d_nnz, o_nnz));
7855             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7856             goto old_bs;
7857           }
7858         }
7859       }
7860       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7861       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7862       PetscCall(PetscFree2(d_nnz, o_nnz));
7863       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7864       // diag
7865       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7866         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7867         ai               = aseq->i;
7868         n                = ai[brow + 1] - ai[brow];
7869         aj               = aseq->j + ai[brow];
7870         for (int k = 0; k < n; k += bs) {        // block columns
7871           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7872           val        = 0;
7873           for (int ii = 0; ii < bs; ii++) { // rows in block
7874             aa = aseq->a + ai[brow + ii] + k;
7875             for (int jj = 0; jj < bs; jj++) {         // columns in block
7876               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7877             }
7878           }
7879           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7880           AA[k / bs] = val;
7881         }
7882         grow = Istart / bs + brow / bs;
7883         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7884       }
7885       // off-diag
7886       if (ismpiaij) {
7887         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7888         const PetscScalar *vals;
7889         const PetscInt    *cols, *garray = aij->garray;
7890         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7891         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7892           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7893           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7894             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7895             AA[k / bs] = 0;
7896             AJ[cidx]   = garray[cols[k]] / bs;
7897           }
7898           nc = ncols / bs;
7899           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7900           for (int ii = 0; ii < bs; ii++) { // rows in block
7901             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7902             for (int k = 0; k < ncols; k += bs) {
7903               for (int jj = 0; jj < bs; jj++) { // cols in block
7904                 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7905                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7906               }
7907             }
7908             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7909           }
7910           grow = Istart / bs + brow / bs;
7911           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7912         }
7913       }
7914       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7915       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7916       PetscCall(PetscFree2(AA, AJ));
7917     } else {
7918       const PetscScalar *vals;
7919       const PetscInt    *idx;
7920       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7921     old_bs:
7922       /*
7923        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7924        */
7925       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7926       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7927       if (isseqaij) {
7928         PetscInt max_d_nnz;
7929         /*
7930          Determine exact preallocation count for (sequential) scalar matrix
7931          */
7932         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7933         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7934         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7935         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7936         PetscCall(PetscFree3(w0, w1, w2));
7937       } else if (ismpiaij) {
7938         Mat             Daij, Oaij;
7939         const PetscInt *garray;
7940         PetscInt        max_d_nnz;
7941         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7942         /*
7943          Determine exact preallocation count for diagonal block portion of scalar matrix
7944          */
7945         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7946         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7947         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7948         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7949         PetscCall(PetscFree3(w0, w1, w2));
7950         /*
7951          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7952          */
7953         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7954           o_nnz[jj] = 0;
7955           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7956             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7957             o_nnz[jj] += ncols;
7958             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7959           }
7960           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7961         }
7962       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7963       /* get scalar copy (norms) of matrix */
7964       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7965       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7966       PetscCall(PetscFree2(d_nnz, o_nnz));
7967       for (Ii = Istart; Ii < Iend; Ii++) {
7968         PetscInt dest_row = Ii / bs;
7969         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7970         for (jj = 0; jj < ncols; jj++) {
7971           PetscInt    dest_col = idx[jj] / bs;
7972           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7973           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7974         }
7975         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7976       }
7977       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7978       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7979     }
7980   } else {
7981     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7982     else {
7983       Gmat = Amat;
7984       PetscCall(PetscObjectReference((PetscObject)Gmat));
7985     }
7986     if (isseqaij) {
7987       a = Gmat;
7988       b = NULL;
7989     } else {
7990       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7991       a             = d->A;
7992       b             = d->B;
7993     }
7994     if (filter >= 0 || scale) {
7995       /* take absolute value of each entry */
7996       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7997         MatInfo      info;
7998         PetscScalar *avals;
7999         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8000         PetscCall(MatSeqAIJGetArray(c, &avals));
8001         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8002         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8003       }
8004     }
8005   }
8006   if (symmetrize) {
8007     PetscBool isset, issym;
8008     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8009     if (!isset || !issym) {
8010       Mat matTrans;
8011       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8012       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8013       PetscCall(MatDestroy(&matTrans));
8014     }
8015     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8016   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8017   if (scale) {
8018     /* scale c for all diagonal values = 1 or -1 */
8019     Vec diag;
8020     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8021     PetscCall(MatGetDiagonal(Gmat, diag));
8022     PetscCall(VecReciprocal(diag));
8023     PetscCall(VecSqrtAbs(diag));
8024     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8025     PetscCall(VecDestroy(&diag));
8026   }
8027   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8028 
8029   if (filter >= 0) {
8030     Mat Fmat = NULL; /* some silly compiler needs this */
8031 
8032     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
8033     PetscCall(MatDestroy(&Gmat));
8034     Gmat = Fmat;
8035   }
8036   *a_Gmat = Gmat;
8037   PetscFunctionReturn(PETSC_SUCCESS);
8038 }
8039 
8040 /*
8041     Special version for direct calls from Fortran
8042 */
8043 #include <petsc/private/fortranimpl.h>
8044 
8045 /* Change these macros so can be used in void function */
8046 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8047 #undef PetscCall
8048 #define PetscCall(...) \
8049   do { \
8050     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8051     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8052       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8053       return; \
8054     } \
8055   } while (0)
8056 
8057 #undef SETERRQ
8058 #define SETERRQ(comm, ierr, ...) \
8059   do { \
8060     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8061     return; \
8062   } while (0)
8063 
8064 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8065   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8066 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8067   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8068 #else
8069 #endif
8070 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8071 {
8072   Mat         mat = *mmat;
8073   PetscInt    m = *mm, n = *mn;
8074   InsertMode  addv = *maddv;
8075   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8076   PetscScalar value;
8077 
8078   MatCheckPreallocated(mat, 1);
8079   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8080   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8081   {
8082     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8083     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8084     PetscBool roworiented = aij->roworiented;
8085 
8086     /* Some Variables required in the macro */
8087     Mat         A     = aij->A;
8088     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8089     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8090     MatScalar  *aa;
8091     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8092     Mat         B                 = aij->B;
8093     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8094     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8095     MatScalar  *ba;
8096     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8097      * cannot use "#if defined" inside a macro. */
8098     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8099 
8100     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8101     PetscInt   nonew = a->nonew;
8102     MatScalar *ap1, *ap2;
8103 
8104     PetscFunctionBegin;
8105     PetscCall(MatSeqAIJGetArray(A, &aa));
8106     PetscCall(MatSeqAIJGetArray(B, &ba));
8107     for (i = 0; i < m; i++) {
8108       if (im[i] < 0) continue;
8109       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8110       if (im[i] >= rstart && im[i] < rend) {
8111         row      = im[i] - rstart;
8112         lastcol1 = -1;
8113         rp1      = aj + ai[row];
8114         ap1      = aa + ai[row];
8115         rmax1    = aimax[row];
8116         nrow1    = ailen[row];
8117         low1     = 0;
8118         high1    = nrow1;
8119         lastcol2 = -1;
8120         rp2      = bj + bi[row];
8121         ap2      = ba + bi[row];
8122         rmax2    = bimax[row];
8123         nrow2    = bilen[row];
8124         low2     = 0;
8125         high2    = nrow2;
8126 
8127         for (j = 0; j < n; j++) {
8128           if (roworiented) value = v[i * n + j];
8129           else value = v[i + j * m];
8130           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8131           if (in[j] >= cstart && in[j] < cend) {
8132             col = in[j] - cstart;
8133             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8134           } else if (in[j] < 0) continue;
8135           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8136             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8137           } else {
8138             if (mat->was_assembled) {
8139               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8140 #if defined(PETSC_USE_CTABLE)
8141               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8142               col--;
8143 #else
8144               col = aij->colmap[in[j]] - 1;
8145 #endif
8146               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8147                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8148                 col = in[j];
8149                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8150                 B        = aij->B;
8151                 b        = (Mat_SeqAIJ *)B->data;
8152                 bimax    = b->imax;
8153                 bi       = b->i;
8154                 bilen    = b->ilen;
8155                 bj       = b->j;
8156                 rp2      = bj + bi[row];
8157                 ap2      = ba + bi[row];
8158                 rmax2    = bimax[row];
8159                 nrow2    = bilen[row];
8160                 low2     = 0;
8161                 high2    = nrow2;
8162                 bm       = aij->B->rmap->n;
8163                 ba       = b->a;
8164                 inserted = PETSC_FALSE;
8165               }
8166             } else col = in[j];
8167             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8168           }
8169         }
8170       } else if (!aij->donotstash) {
8171         if (roworiented) {
8172           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8173         } else {
8174           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8175         }
8176       }
8177     }
8178     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8179     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8180   }
8181   PetscFunctionReturnVoid();
8182 }
8183 
8184 /* Undefining these here since they were redefined from their original definition above! No
8185  * other PETSc functions should be defined past this point, as it is impossible to recover the
8186  * original definitions */
8187 #undef PetscCall
8188 #undef SETERRQ
8189