xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a4e35b1925eceef64945ea472b84f2bf06a67b5e)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166 
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
171 {
172   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
173 
174   PetscFunctionBegin;
175   if (mat->A) {
176     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
177     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
178   }
179   PetscFunctionReturn(PETSC_SUCCESS);
180 }
181 
182 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
183 {
184   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
185   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
186   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
187   const PetscInt  *ia, *ib;
188   const MatScalar *aa, *bb, *aav, *bav;
189   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
190   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
191 
192   PetscFunctionBegin;
193   *keptrows = NULL;
194 
195   ia = a->i;
196   ib = b->i;
197   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
198   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
199   for (i = 0; i < m; i++) {
200     na = ia[i + 1] - ia[i];
201     nb = ib[i + 1] - ib[i];
202     if (!na && !nb) {
203       cnt++;
204       goto ok1;
205     }
206     aa = aav + ia[i];
207     for (j = 0; j < na; j++) {
208       if (aa[j] != 0.0) goto ok1;
209     }
210     bb = bav + ib[i];
211     for (j = 0; j < nb; j++) {
212       if (bb[j] != 0.0) goto ok1;
213     }
214     cnt++;
215   ok1:;
216   }
217   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
218   if (!n0rows) {
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
220     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
221     PetscFunctionReturn(PETSC_SUCCESS);
222   }
223   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
224   cnt = 0;
225   for (i = 0; i < m; i++) {
226     na = ia[i + 1] - ia[i];
227     nb = ib[i + 1] - ib[i];
228     if (!na && !nb) continue;
229     aa = aav + ia[i];
230     for (j = 0; j < na; j++) {
231       if (aa[j] != 0.0) {
232         rows[cnt++] = rstart + i;
233         goto ok2;
234       }
235     }
236     bb = bav + ib[i];
237     for (j = 0; j < nb; j++) {
238       if (bb[j] != 0.0) {
239         rows[cnt++] = rstart + i;
240         goto ok2;
241       }
242     }
243   ok2:;
244   }
245   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
247   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
252 {
253   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
254   PetscBool   cong;
255 
256   PetscFunctionBegin;
257   PetscCall(MatHasCongruentLayouts(Y, &cong));
258   if (Y->assembled && cong) {
259     PetscCall(MatDiagonalSet(aij->A, D, is));
260   } else {
261     PetscCall(MatDiagonalSet_Default(Y, D, is));
262   }
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
267 {
268   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
269   PetscInt    i, rstart, nrows, *rows;
270 
271   PetscFunctionBegin;
272   *zrows = NULL;
273   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
274   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
275   for (i = 0; i < nrows; i++) rows[i] += rstart;
276   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
281 {
282   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
283   PetscInt           i, m, n, *garray = aij->garray;
284   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
285   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
286   PetscReal         *work;
287   const PetscScalar *dummy;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   if (type == NORM_INFINITY) {
313     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
314   } else {
315     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
316   }
317   PetscCall(PetscFree(work));
318   if (type == NORM_2) {
319     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
320   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
321     for (i = 0; i < n; i++) reductions[i] /= m;
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
327 {
328   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
329   IS              sis, gis;
330   const PetscInt *isis, *igis;
331   PetscInt        n, *iis, nsis, ngis, rstart, i;
332 
333   PetscFunctionBegin;
334   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
335   PetscCall(MatFindNonzeroRows(a->B, &gis));
336   PetscCall(ISGetSize(gis, &ngis));
337   PetscCall(ISGetSize(sis, &nsis));
338   PetscCall(ISGetIndices(sis, &isis));
339   PetscCall(ISGetIndices(gis, &igis));
340 
341   PetscCall(PetscMalloc1(ngis + nsis, &iis));
342   PetscCall(PetscArraycpy(iis, igis, ngis));
343   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
344   n = ngis + nsis;
345   PetscCall(PetscSortRemoveDupsInt(&n, iis));
346   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
347   for (i = 0; i < n; i++) iis[i] += rstart;
348   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
349 
350   PetscCall(ISRestoreIndices(sis, &isis));
351   PetscCall(ISRestoreIndices(gis, &igis));
352   PetscCall(ISDestroy(&sis));
353   PetscCall(ISDestroy(&gis));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*
358   Local utility routine that creates a mapping from the global column
359 number to the local number in the off-diagonal part of the local
360 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
361 a slightly higher hash table cost; without it it is not scalable (each processor
362 has an order N integer array but is fast to access.
363 */
364 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
365 {
366   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
367   PetscInt    n   = aij->B->cmap->n, i;
368 
369   PetscFunctionBegin;
370   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
371 #if defined(PETSC_USE_CTABLE)
372   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
373   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
374 #else
375   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
376   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
377 #endif
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
382   do { \
383     if (col <= lastcol1) low1 = 0; \
384     else high1 = nrow1; \
385     lastcol1 = col; \
386     while (high1 - low1 > 5) { \
387       t = (low1 + high1) / 2; \
388       if (rp1[t] > col) high1 = t; \
389       else low1 = t; \
390     } \
391     for (_i = low1; _i < high1; _i++) { \
392       if (rp1[_i] > col) break; \
393       if (rp1[_i] == col) { \
394         if (addv == ADD_VALUES) { \
395           ap1[_i] += value; \
396           /* Not sure LogFlops will slow dow the code or not */ \
397           (void)PetscLogFlops(1.0); \
398         } else ap1[_i] = value; \
399         goto a_noinsert; \
400       } \
401     } \
402     if (value == 0.0 && ignorezeroentries && row != col) { \
403       low1  = 0; \
404       high1 = nrow1; \
405       goto a_noinsert; \
406     } \
407     if (nonew == 1) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
413     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
414     N = nrow1++ - 1; \
415     a->nz++; \
416     high1++; \
417     /* shift up all the later entries in this row */ \
418     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
419     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
420     rp1[_i] = col; \
421     ap1[_i] = value; \
422     A->nonzerostate++; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467     B->nonzerostate++; \
468   b_noinsert:; \
469     bilen[row] = nrow2; \
470   } while (0)
471 
472 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
473 {
474   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
475   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
476   PetscInt     l, *garray                         = mat->garray, diag;
477   PetscScalar *aa, *ba;
478 
479   PetscFunctionBegin;
480   /* code only works for square matrices A */
481 
482   /* find size of row to the left of the diagonal part */
483   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
484   row = row - diag;
485   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
486     if (garray[b->j[b->i[row] + l]] > diag) break;
487   }
488   if (l) {
489     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
490     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
491     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
492   }
493 
494   /* diagonal part */
495   if (a->i[row + 1] - a->i[row]) {
496     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
497     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
498     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
499   }
500 
501   /* right of diagonal part */
502   if (b->i[row + 1] - b->i[row] - l) {
503     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
504     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
505     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
506   }
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
511 {
512   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
513   PetscScalar value = 0.0;
514   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
515   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
516   PetscBool   roworiented = aij->roworiented;
517 
518   /* Some Variables required in the macro */
519   Mat         A     = aij->A;
520   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
521   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
522   PetscBool   ignorezeroentries = a->ignorezeroentries;
523   Mat         B                 = aij->B;
524   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
525   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
526   MatScalar  *aa, *ba;
527   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
528   PetscInt    nonew;
529   MatScalar  *ap1, *ap2;
530 
531   PetscFunctionBegin;
532   PetscCall(MatSeqAIJGetArray(A, &aa));
533   PetscCall(MatSeqAIJGetArray(B, &ba));
534   for (i = 0; i < m; i++) {
535     if (im[i] < 0) continue;
536     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
537     if (im[i] >= rstart && im[i] < rend) {
538       row      = im[i] - rstart;
539       lastcol1 = -1;
540       rp1      = aj + ai[row];
541       ap1      = aa + ai[row];
542       rmax1    = aimax[row];
543       nrow1    = ailen[row];
544       low1     = 0;
545       high1    = nrow1;
546       lastcol2 = -1;
547       rp2      = bj + bi[row];
548       ap2      = ba + bi[row];
549       rmax2    = bimax[row];
550       nrow2    = bilen[row];
551       low2     = 0;
552       high2    = nrow2;
553 
554       for (j = 0; j < n; j++) {
555         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
556         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
557         if (in[j] >= cstart && in[j] < cend) {
558           col   = in[j] - cstart;
559           nonew = a->nonew;
560           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
561         } else if (in[j] < 0) {
562           continue;
563         } else {
564           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
565           if (mat->was_assembled) {
566             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
567 #if defined(PETSC_USE_CTABLE)
568             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
569             col--;
570 #else
571             col = aij->colmap[in[j]] - 1;
572 #endif
573             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
574               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
575               col = in[j];
576               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
577               B     = aij->B;
578               b     = (Mat_SeqAIJ *)B->data;
579               bimax = b->imax;
580               bi    = b->i;
581               bilen = b->ilen;
582               bj    = b->j;
583               ba    = b->a;
584               rp2   = bj + bi[row];
585               ap2   = ba + bi[row];
586               rmax2 = bimax[row];
587               nrow2 = bilen[row];
588               low2  = 0;
589               high2 = nrow2;
590               bm    = aij->B->rmap->n;
591               ba    = b->a;
592             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
593               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
594                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
595               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
596             }
597           } else col = in[j];
598           nonew = b->nonew;
599           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
600         }
601       }
602     } else {
603       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
604       if (!aij->donotstash) {
605         mat->assembled = PETSC_FALSE;
606         if (roworiented) {
607           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
608         } else {
609           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
610         }
611       }
612     }
613   }
614   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
615   PetscCall(MatSeqAIJRestoreArray(B, &ba));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 /*
620     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
621     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
622     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
623 */
624 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
625 {
626   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
627   Mat         A      = aij->A; /* diagonal part of the matrix */
628   Mat         B      = aij->B; /* offdiagonal part of the matrix */
629   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
630   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
632   PetscInt   *ailen = a->ilen, *aj = a->j;
633   PetscInt   *bilen = b->ilen, *bj = b->j;
634   PetscInt    am          = aij->A->rmap->n, j;
635   PetscInt    diag_so_far = 0, dnz;
636   PetscInt    offd_so_far = 0, onz;
637 
638   PetscFunctionBegin;
639   /* Iterate over all rows of the matrix */
640   for (j = 0; j < am; j++) {
641     dnz = onz = 0;
642     /*  Iterate over all non-zero columns of the current row */
643     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
644       /* If column is in the diagonal */
645       if (mat_j[col] >= cstart && mat_j[col] < cend) {
646         aj[diag_so_far++] = mat_j[col] - cstart;
647         dnz++;
648       } else { /* off-diagonal entries */
649         bj[offd_so_far++] = mat_j[col];
650         onz++;
651       }
652     }
653     ailen[j] = dnz;
654     bilen[j] = onz;
655   }
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 /*
660     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
661     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
662     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
663     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
664     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
665 */
666 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
667 {
668   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
669   Mat          A    = aij->A; /* diagonal part of the matrix */
670   Mat          B    = aij->B; /* offdiagonal part of the matrix */
671   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
672   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
673   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
674   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
675   PetscInt    *ailen = a->ilen, *aj = a->j;
676   PetscInt    *bilen = b->ilen, *bj = b->j;
677   PetscInt     am          = aij->A->rmap->n, j;
678   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
679   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
680   PetscScalar *aa = a->a, *ba = b->a;
681 
682   PetscFunctionBegin;
683   /* Iterate over all rows of the matrix */
684   for (j = 0; j < am; j++) {
685     dnz_row = onz_row = 0;
686     rowstart_offd     = full_offd_i[j];
687     rowstart_diag     = full_diag_i[j];
688     /*  Iterate over all non-zero columns of the current row */
689     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
690       /* If column is in the diagonal */
691       if (mat_j[col] >= cstart && mat_j[col] < cend) {
692         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
693         aa[rowstart_diag + dnz_row] = mat_a[col];
694         dnz_row++;
695       } else { /* off-diagonal entries */
696         bj[rowstart_offd + onz_row] = mat_j[col];
697         ba[rowstart_offd + onz_row] = mat_a[col];
698         onz_row++;
699       }
700     }
701     ailen[j] = dnz_row;
702     bilen[j] = onz_row;
703   }
704   PetscFunctionReturn(PETSC_SUCCESS);
705 }
706 
707 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
708 {
709   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
710   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
711   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
712 
713   PetscFunctionBegin;
714   for (i = 0; i < m; i++) {
715     if (idxm[i] < 0) continue; /* negative row */
716     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
717     if (idxm[i] >= rstart && idxm[i] < rend) {
718       row = idxm[i] - rstart;
719       for (j = 0; j < n; j++) {
720         if (idxn[j] < 0) continue; /* negative column */
721         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
722         if (idxn[j] >= cstart && idxn[j] < cend) {
723           col = idxn[j] - cstart;
724           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
725         } else {
726           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
727 #if defined(PETSC_USE_CTABLE)
728           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
729           col--;
730 #else
731           col = aij->colmap[idxn[j]] - 1;
732 #endif
733           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
734           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
735         }
736       }
737     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
738   }
739   PetscFunctionReturn(PETSC_SUCCESS);
740 }
741 
742 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
743 {
744   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
745   PetscInt    nstash, reallocs;
746 
747   PetscFunctionBegin;
748   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
749 
750   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
751   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
752   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
753   PetscFunctionReturn(PETSC_SUCCESS);
754 }
755 
756 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
757 {
758   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
759   PetscMPIInt  n;
760   PetscInt     i, j, rstart, ncols, flg;
761   PetscInt    *row, *col;
762   PetscBool    other_disassembled;
763   PetscScalar *val;
764 
765   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
766 
767   PetscFunctionBegin;
768   if (!aij->donotstash && !mat->nooffprocentries) {
769     while (1) {
770       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
771       if (!flg) break;
772 
773       for (i = 0; i < n;) {
774         /* Now identify the consecutive vals belonging to the same row */
775         for (j = i, rstart = row[j]; j < n; j++) {
776           if (row[j] != rstart) break;
777         }
778         if (j < n) ncols = j - i;
779         else ncols = n - i;
780         /* Now assemble all these values with a single function call */
781         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
782         i = j;
783       }
784     }
785     PetscCall(MatStashScatterEnd_Private(&mat->stash));
786   }
787 #if defined(PETSC_HAVE_DEVICE)
788   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
789   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
790   if (mat->boundtocpu) {
791     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
792     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
793   }
794 #endif
795   PetscCall(MatAssemblyBegin(aij->A, mode));
796   PetscCall(MatAssemblyEnd(aij->A, mode));
797 
798   /* determine if any processor has disassembled, if so we must
799      also disassemble ourself, in order that we may reassemble. */
800   /*
801      if nonzero structure of submatrix B cannot change then we know that
802      no processor disassembled thus we can skip this stuff
803   */
804   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
805     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
806     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
807       PetscCall(MatDisAssemble_MPIAIJ(mat));
808     }
809   }
810   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
811   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
812 #if defined(PETSC_HAVE_DEVICE)
813   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
814 #endif
815   PetscCall(MatAssemblyBegin(aij->B, mode));
816   PetscCall(MatAssemblyEnd(aij->B, mode));
817 
818   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
819 
820   aij->rowvalues = NULL;
821 
822   PetscCall(VecDestroy(&aij->diag));
823 
824   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
825   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
826     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
827     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
828   }
829 #if defined(PETSC_HAVE_DEVICE)
830   mat->offloadmask = PETSC_OFFLOAD_BOTH;
831 #endif
832   PetscFunctionReturn(PETSC_SUCCESS);
833 }
834 
835 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
836 {
837   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
838 
839   PetscFunctionBegin;
840   PetscCall(MatZeroEntries(l->A));
841   PetscCall(MatZeroEntries(l->B));
842   PetscFunctionReturn(PETSC_SUCCESS);
843 }
844 
845 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
846 {
847   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
848   PetscObjectState sA, sB;
849   PetscInt        *lrows;
850   PetscInt         r, len;
851   PetscBool        cong, lch, gch;
852 
853   PetscFunctionBegin;
854   /* get locally owned rows */
855   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
856   PetscCall(MatHasCongruentLayouts(A, &cong));
857   /* fix right hand side if needed */
858   if (x && b) {
859     const PetscScalar *xx;
860     PetscScalar       *bb;
861 
862     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
863     PetscCall(VecGetArrayRead(x, &xx));
864     PetscCall(VecGetArray(b, &bb));
865     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
866     PetscCall(VecRestoreArrayRead(x, &xx));
867     PetscCall(VecRestoreArray(b, &bb));
868   }
869 
870   sA = mat->A->nonzerostate;
871   sB = mat->B->nonzerostate;
872 
873   if (diag != 0.0 && cong) {
874     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
875     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
876   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
877     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
878     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
879     PetscInt    nnwA, nnwB;
880     PetscBool   nnzA, nnzB;
881 
882     nnwA = aijA->nonew;
883     nnwB = aijB->nonew;
884     nnzA = aijA->keepnonzeropattern;
885     nnzB = aijB->keepnonzeropattern;
886     if (!nnzA) {
887       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
888       aijA->nonew = 0;
889     }
890     if (!nnzB) {
891       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
892       aijB->nonew = 0;
893     }
894     /* Must zero here before the next loop */
895     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
896     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
897     for (r = 0; r < len; ++r) {
898       const PetscInt row = lrows[r] + A->rmap->rstart;
899       if (row >= A->cmap->N) continue;
900       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
901     }
902     aijA->nonew = nnwA;
903     aijB->nonew = nnwB;
904   } else {
905     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
906     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
907   }
908   PetscCall(PetscFree(lrows));
909   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
910   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
911 
912   /* reduce nonzerostate */
913   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
914   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
915   if (gch) A->nonzerostate++;
916   PetscFunctionReturn(PETSC_SUCCESS);
917 }
918 
919 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
920 {
921   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
922   PetscMPIInt        n = A->rmap->n;
923   PetscInt           i, j, r, m, len = 0;
924   PetscInt          *lrows, *owners = A->rmap->range;
925   PetscMPIInt        p = 0;
926   PetscSFNode       *rrows;
927   PetscSF            sf;
928   const PetscScalar *xx;
929   PetscScalar       *bb, *mask, *aij_a;
930   Vec                xmask, lmask;
931   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
932   const PetscInt    *aj, *ii, *ridx;
933   PetscScalar       *aa;
934 
935   PetscFunctionBegin;
936   /* Create SF where leaves are input rows and roots are owned rows */
937   PetscCall(PetscMalloc1(n, &lrows));
938   for (r = 0; r < n; ++r) lrows[r] = -1;
939   PetscCall(PetscMalloc1(N, &rrows));
940   for (r = 0; r < N; ++r) {
941     const PetscInt idx = rows[r];
942     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
943     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
944       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
945     }
946     rrows[r].rank  = p;
947     rrows[r].index = rows[r] - owners[p];
948   }
949   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
950   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
951   /* Collect flags for rows to be zeroed */
952   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
953   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
954   PetscCall(PetscSFDestroy(&sf));
955   /* Compress and put in row numbers */
956   for (r = 0; r < n; ++r)
957     if (lrows[r] >= 0) lrows[len++] = r;
958   /* zero diagonal part of matrix */
959   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
960   /* handle off diagonal part of matrix */
961   PetscCall(MatCreateVecs(A, &xmask, NULL));
962   PetscCall(VecDuplicate(l->lvec, &lmask));
963   PetscCall(VecGetArray(xmask, &bb));
964   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
965   PetscCall(VecRestoreArray(xmask, &bb));
966   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
967   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
968   PetscCall(VecDestroy(&xmask));
969   if (x && b) { /* this code is buggy when the row and column layout don't match */
970     PetscBool cong;
971 
972     PetscCall(MatHasCongruentLayouts(A, &cong));
973     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
974     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
975     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
976     PetscCall(VecGetArrayRead(l->lvec, &xx));
977     PetscCall(VecGetArray(b, &bb));
978   }
979   PetscCall(VecGetArray(lmask, &mask));
980   /* remove zeroed rows of off diagonal matrix */
981   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
982   ii = aij->i;
983   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
984   /* loop over all elements of off process part of matrix zeroing removed columns*/
985   if (aij->compressedrow.use) {
986     m    = aij->compressedrow.nrows;
987     ii   = aij->compressedrow.i;
988     ridx = aij->compressedrow.rindex;
989     for (i = 0; i < m; i++) {
990       n  = ii[i + 1] - ii[i];
991       aj = aij->j + ii[i];
992       aa = aij_a + ii[i];
993 
994       for (j = 0; j < n; j++) {
995         if (PetscAbsScalar(mask[*aj])) {
996           if (b) bb[*ridx] -= *aa * xx[*aj];
997           *aa = 0.0;
998         }
999         aa++;
1000         aj++;
1001       }
1002       ridx++;
1003     }
1004   } else { /* do not use compressed row format */
1005     m = l->B->rmap->n;
1006     for (i = 0; i < m; i++) {
1007       n  = ii[i + 1] - ii[i];
1008       aj = aij->j + ii[i];
1009       aa = aij_a + ii[i];
1010       for (j = 0; j < n; j++) {
1011         if (PetscAbsScalar(mask[*aj])) {
1012           if (b) bb[i] -= *aa * xx[*aj];
1013           *aa = 0.0;
1014         }
1015         aa++;
1016         aj++;
1017       }
1018     }
1019   }
1020   if (x && b) {
1021     PetscCall(VecRestoreArray(b, &bb));
1022     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1023   }
1024   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1025   PetscCall(VecRestoreArray(lmask, &mask));
1026   PetscCall(VecDestroy(&lmask));
1027   PetscCall(PetscFree(lrows));
1028 
1029   /* only change matrix nonzero state if pattern was allowed to be changed */
1030   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1031     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1032     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1033   }
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
1037 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1038 {
1039   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1040   PetscInt    nt;
1041   VecScatter  Mvctx = a->Mvctx;
1042 
1043   PetscFunctionBegin;
1044   PetscCall(VecGetLocalSize(xx, &nt));
1045   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1046   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1047   PetscUseTypeMethod(a->A, mult, xx, yy);
1048   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1049   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1050   PetscFunctionReturn(PETSC_SUCCESS);
1051 }
1052 
1053 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1054 {
1055   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1056 
1057   PetscFunctionBegin;
1058   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1059   PetscFunctionReturn(PETSC_SUCCESS);
1060 }
1061 
1062 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1063 {
1064   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1065   VecScatter  Mvctx = a->Mvctx;
1066 
1067   PetscFunctionBegin;
1068   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1069   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1070   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1071   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1072   PetscFunctionReturn(PETSC_SUCCESS);
1073 }
1074 
1075 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1076 {
1077   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1078 
1079   PetscFunctionBegin;
1080   /* do nondiagonal part */
1081   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1082   /* do local part */
1083   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1084   /* add partial results together */
1085   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1086   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1091 {
1092   MPI_Comm    comm;
1093   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1094   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1095   IS          Me, Notme;
1096   PetscInt    M, N, first, last, *notme, i;
1097   PetscBool   lf;
1098   PetscMPIInt size;
1099 
1100   PetscFunctionBegin;
1101   /* Easy test: symmetric diagonal block */
1102   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1103   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1104   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1105   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1106   PetscCallMPI(MPI_Comm_size(comm, &size));
1107   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1108 
1109   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1110   PetscCall(MatGetSize(Amat, &M, &N));
1111   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1112   PetscCall(PetscMalloc1(N - last + first, &notme));
1113   for (i = 0; i < first; i++) notme[i] = i;
1114   for (i = last; i < M; i++) notme[i - last + first] = i;
1115   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1116   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1117   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1118   Aoff = Aoffs[0];
1119   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1120   Boff = Boffs[0];
1121   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1122   PetscCall(MatDestroyMatrices(1, &Aoffs));
1123   PetscCall(MatDestroyMatrices(1, &Boffs));
1124   PetscCall(ISDestroy(&Me));
1125   PetscCall(ISDestroy(&Notme));
1126   PetscCall(PetscFree(notme));
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1131 {
1132   PetscFunctionBegin;
1133   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 
1137 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1138 {
1139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1140 
1141   PetscFunctionBegin;
1142   /* do nondiagonal part */
1143   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1144   /* do local part */
1145   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1146   /* add partial results together */
1147   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1148   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1149   PetscFunctionReturn(PETSC_SUCCESS);
1150 }
1151 
1152 /*
1153   This only works correctly for square matrices where the subblock A->A is the
1154    diagonal block
1155 */
1156 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1157 {
1158   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1159 
1160   PetscFunctionBegin;
1161   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1162   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1163   PetscCall(MatGetDiagonal(a->A, v));
1164   PetscFunctionReturn(PETSC_SUCCESS);
1165 }
1166 
1167 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1168 {
1169   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1170 
1171   PetscFunctionBegin;
1172   PetscCall(MatScale(a->A, aa));
1173   PetscCall(MatScale(a->B, aa));
1174   PetscFunctionReturn(PETSC_SUCCESS);
1175 }
1176 
1177 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1178 {
1179   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1180   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1181   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1182   const PetscInt    *garray = aij->garray;
1183   const PetscScalar *aa, *ba;
1184   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1185   PetscInt64         nz, hnz;
1186   PetscInt          *rowlens;
1187   PetscInt          *colidxs;
1188   PetscScalar       *matvals;
1189   PetscMPIInt        rank;
1190 
1191   PetscFunctionBegin;
1192   PetscCall(PetscViewerSetUp(viewer));
1193 
1194   M  = mat->rmap->N;
1195   N  = mat->cmap->N;
1196   m  = mat->rmap->n;
1197   rs = mat->rmap->rstart;
1198   cs = mat->cmap->rstart;
1199   nz = A->nz + B->nz;
1200 
1201   /* write matrix header */
1202   header[0] = MAT_FILE_CLASSID;
1203   header[1] = M;
1204   header[2] = N;
1205   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1206   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1207   if (rank == 0) {
1208     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1209     else header[3] = (PetscInt)hnz;
1210   }
1211   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1212 
1213   /* fill in and store row lengths  */
1214   PetscCall(PetscMalloc1(m, &rowlens));
1215   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1216   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1217   PetscCall(PetscFree(rowlens));
1218 
1219   /* fill in and store column indices */
1220   PetscCall(PetscMalloc1(nz, &colidxs));
1221   for (cnt = 0, i = 0; i < m; i++) {
1222     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1223       if (garray[B->j[jb]] > cs) break;
1224       colidxs[cnt++] = garray[B->j[jb]];
1225     }
1226     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1227     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1228   }
1229   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1230   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1231   PetscCall(PetscFree(colidxs));
1232 
1233   /* fill in and store nonzero values */
1234   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1235   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1236   PetscCall(PetscMalloc1(nz, &matvals));
1237   for (cnt = 0, i = 0; i < m; i++) {
1238     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1239       if (garray[B->j[jb]] > cs) break;
1240       matvals[cnt++] = ba[jb];
1241     }
1242     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1243     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1244   }
1245   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1246   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1247   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1248   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1249   PetscCall(PetscFree(matvals));
1250 
1251   /* write block size option to the viewer's .info file */
1252   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1253   PetscFunctionReturn(PETSC_SUCCESS);
1254 }
1255 
1256 #include <petscdraw.h>
1257 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1258 {
1259   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1260   PetscMPIInt       rank = aij->rank, size = aij->size;
1261   PetscBool         isdraw, iascii, isbinary;
1262   PetscViewer       sviewer;
1263   PetscViewerFormat format;
1264 
1265   PetscFunctionBegin;
1266   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1267   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1268   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1269   if (iascii) {
1270     PetscCall(PetscViewerGetFormat(viewer, &format));
1271     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1272       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1273       PetscCall(PetscMalloc1(size, &nz));
1274       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1275       for (i = 0; i < (PetscInt)size; i++) {
1276         nmax = PetscMax(nmax, nz[i]);
1277         nmin = PetscMin(nmin, nz[i]);
1278         navg += nz[i];
1279       }
1280       PetscCall(PetscFree(nz));
1281       navg = navg / size;
1282       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1283       PetscFunctionReturn(PETSC_SUCCESS);
1284     }
1285     PetscCall(PetscViewerGetFormat(viewer, &format));
1286     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1287       MatInfo   info;
1288       PetscInt *inodes = NULL;
1289 
1290       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1291       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1292       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1293       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1294       if (!inodes) {
1295         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1296                                                      (double)info.memory));
1297       } else {
1298         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1299                                                      (double)info.memory));
1300       }
1301       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1302       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1303       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1304       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1305       PetscCall(PetscViewerFlush(viewer));
1306       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1307       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1308       PetscCall(VecScatterView(aij->Mvctx, viewer));
1309       PetscFunctionReturn(PETSC_SUCCESS);
1310     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1311       PetscInt inodecount, inodelimit, *inodes;
1312       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1313       if (inodes) {
1314         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1315       } else {
1316         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1317       }
1318       PetscFunctionReturn(PETSC_SUCCESS);
1319     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1320       PetscFunctionReturn(PETSC_SUCCESS);
1321     }
1322   } else if (isbinary) {
1323     if (size == 1) {
1324       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1325       PetscCall(MatView(aij->A, viewer));
1326     } else {
1327       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1328     }
1329     PetscFunctionReturn(PETSC_SUCCESS);
1330   } else if (iascii && size == 1) {
1331     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1332     PetscCall(MatView(aij->A, viewer));
1333     PetscFunctionReturn(PETSC_SUCCESS);
1334   } else if (isdraw) {
1335     PetscDraw draw;
1336     PetscBool isnull;
1337     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1338     PetscCall(PetscDrawIsNull(draw, &isnull));
1339     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1340   }
1341 
1342   { /* assemble the entire matrix onto first processor */
1343     Mat A = NULL, Av;
1344     IS  isrow, iscol;
1345 
1346     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1347     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1348     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1349     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1350     /*  The commented code uses MatCreateSubMatrices instead */
1351     /*
1352     Mat *AA, A = NULL, Av;
1353     IS  isrow,iscol;
1354 
1355     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1356     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1357     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1358     if (rank == 0) {
1359        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1360        A    = AA[0];
1361        Av   = AA[0];
1362     }
1363     PetscCall(MatDestroySubMatrices(1,&AA));
1364 */
1365     PetscCall(ISDestroy(&iscol));
1366     PetscCall(ISDestroy(&isrow));
1367     /*
1368        Everyone has to call to draw the matrix since the graphics waits are
1369        synchronized across all processors that share the PetscDraw object
1370     */
1371     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1372     if (rank == 0) {
1373       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1374       PetscCall(MatView_SeqAIJ(Av, sviewer));
1375     }
1376     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1377     PetscCall(PetscViewerFlush(viewer));
1378     PetscCall(MatDestroy(&A));
1379   }
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1384 {
1385   PetscBool iascii, isdraw, issocket, isbinary;
1386 
1387   PetscFunctionBegin;
1388   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1389   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1390   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1391   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1392   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1393   PetscFunctionReturn(PETSC_SUCCESS);
1394 }
1395 
1396 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1397 {
1398   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1399   Vec         bb1 = NULL;
1400   PetscBool   hasop;
1401 
1402   PetscFunctionBegin;
1403   if (flag == SOR_APPLY_UPPER) {
1404     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1405     PetscFunctionReturn(PETSC_SUCCESS);
1406   }
1407 
1408   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1409 
1410   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1411     if (flag & SOR_ZERO_INITIAL_GUESS) {
1412       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1413       its--;
1414     }
1415 
1416     while (its--) {
1417       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1418       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1419 
1420       /* update rhs: bb1 = bb - B*x */
1421       PetscCall(VecScale(mat->lvec, -1.0));
1422       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1423 
1424       /* local sweep */
1425       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1426     }
1427   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1428     if (flag & SOR_ZERO_INITIAL_GUESS) {
1429       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1430       its--;
1431     }
1432     while (its--) {
1433       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1434       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1435 
1436       /* update rhs: bb1 = bb - B*x */
1437       PetscCall(VecScale(mat->lvec, -1.0));
1438       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1439 
1440       /* local sweep */
1441       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1442     }
1443   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1444     if (flag & SOR_ZERO_INITIAL_GUESS) {
1445       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1446       its--;
1447     }
1448     while (its--) {
1449       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1450       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1451 
1452       /* update rhs: bb1 = bb - B*x */
1453       PetscCall(VecScale(mat->lvec, -1.0));
1454       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1455 
1456       /* local sweep */
1457       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1458     }
1459   } else if (flag & SOR_EISENSTAT) {
1460     Vec xx1;
1461 
1462     PetscCall(VecDuplicate(bb, &xx1));
1463     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1464 
1465     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1466     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1467     if (!mat->diag) {
1468       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1469       PetscCall(MatGetDiagonal(matin, mat->diag));
1470     }
1471     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1472     if (hasop) {
1473       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1474     } else {
1475       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1476     }
1477     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1478 
1479     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1480 
1481     /* local sweep */
1482     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1483     PetscCall(VecAXPY(xx, 1.0, xx1));
1484     PetscCall(VecDestroy(&xx1));
1485   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1486 
1487   PetscCall(VecDestroy(&bb1));
1488 
1489   matin->factorerrortype = mat->A->factorerrortype;
1490   PetscFunctionReturn(PETSC_SUCCESS);
1491 }
1492 
1493 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1494 {
1495   Mat             aA, aB, Aperm;
1496   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1497   PetscScalar    *aa, *ba;
1498   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1499   PetscSF         rowsf, sf;
1500   IS              parcolp = NULL;
1501   PetscBool       done;
1502 
1503   PetscFunctionBegin;
1504   PetscCall(MatGetLocalSize(A, &m, &n));
1505   PetscCall(ISGetIndices(rowp, &rwant));
1506   PetscCall(ISGetIndices(colp, &cwant));
1507   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1508 
1509   /* Invert row permutation to find out where my rows should go */
1510   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1511   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1512   PetscCall(PetscSFSetFromOptions(rowsf));
1513   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1514   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1515   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1516 
1517   /* Invert column permutation to find out where my columns should go */
1518   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1519   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1520   PetscCall(PetscSFSetFromOptions(sf));
1521   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1522   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1523   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1524   PetscCall(PetscSFDestroy(&sf));
1525 
1526   PetscCall(ISRestoreIndices(rowp, &rwant));
1527   PetscCall(ISRestoreIndices(colp, &cwant));
1528   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1529 
1530   /* Find out where my gcols should go */
1531   PetscCall(MatGetSize(aB, NULL, &ng));
1532   PetscCall(PetscMalloc1(ng, &gcdest));
1533   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1534   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1535   PetscCall(PetscSFSetFromOptions(sf));
1536   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1537   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1538   PetscCall(PetscSFDestroy(&sf));
1539 
1540   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1541   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1542   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1543   for (i = 0; i < m; i++) {
1544     PetscInt    row = rdest[i];
1545     PetscMPIInt rowner;
1546     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1547     for (j = ai[i]; j < ai[i + 1]; j++) {
1548       PetscInt    col = cdest[aj[j]];
1549       PetscMPIInt cowner;
1550       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1551       if (rowner == cowner) dnnz[i]++;
1552       else onnz[i]++;
1553     }
1554     for (j = bi[i]; j < bi[i + 1]; j++) {
1555       PetscInt    col = gcdest[bj[j]];
1556       PetscMPIInt cowner;
1557       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1558       if (rowner == cowner) dnnz[i]++;
1559       else onnz[i]++;
1560     }
1561   }
1562   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1563   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1564   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1565   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1566   PetscCall(PetscSFDestroy(&rowsf));
1567 
1568   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1569   PetscCall(MatSeqAIJGetArray(aA, &aa));
1570   PetscCall(MatSeqAIJGetArray(aB, &ba));
1571   for (i = 0; i < m; i++) {
1572     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1573     PetscInt  j0, rowlen;
1574     rowlen = ai[i + 1] - ai[i];
1575     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1576       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1577       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1578     }
1579     rowlen = bi[i + 1] - bi[i];
1580     for (j0 = j = 0; j < rowlen; j0 = j) {
1581       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1582       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1583     }
1584   }
1585   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1586   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1587   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1588   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1589   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1590   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1591   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1592   PetscCall(PetscFree3(work, rdest, cdest));
1593   PetscCall(PetscFree(gcdest));
1594   if (parcolp) PetscCall(ISDestroy(&colp));
1595   *B = Aperm;
1596   PetscFunctionReturn(PETSC_SUCCESS);
1597 }
1598 
1599 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1600 {
1601   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1602 
1603   PetscFunctionBegin;
1604   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1605   if (ghosts) *ghosts = aij->garray;
1606   PetscFunctionReturn(PETSC_SUCCESS);
1607 }
1608 
1609 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1610 {
1611   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1612   Mat            A = mat->A, B = mat->B;
1613   PetscLogDouble isend[5], irecv[5];
1614 
1615   PetscFunctionBegin;
1616   info->block_size = 1.0;
1617   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1618 
1619   isend[0] = info->nz_used;
1620   isend[1] = info->nz_allocated;
1621   isend[2] = info->nz_unneeded;
1622   isend[3] = info->memory;
1623   isend[4] = info->mallocs;
1624 
1625   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1626 
1627   isend[0] += info->nz_used;
1628   isend[1] += info->nz_allocated;
1629   isend[2] += info->nz_unneeded;
1630   isend[3] += info->memory;
1631   isend[4] += info->mallocs;
1632   if (flag == MAT_LOCAL) {
1633     info->nz_used      = isend[0];
1634     info->nz_allocated = isend[1];
1635     info->nz_unneeded  = isend[2];
1636     info->memory       = isend[3];
1637     info->mallocs      = isend[4];
1638   } else if (flag == MAT_GLOBAL_MAX) {
1639     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1640 
1641     info->nz_used      = irecv[0];
1642     info->nz_allocated = irecv[1];
1643     info->nz_unneeded  = irecv[2];
1644     info->memory       = irecv[3];
1645     info->mallocs      = irecv[4];
1646   } else if (flag == MAT_GLOBAL_SUM) {
1647     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1648 
1649     info->nz_used      = irecv[0];
1650     info->nz_allocated = irecv[1];
1651     info->nz_unneeded  = irecv[2];
1652     info->memory       = irecv[3];
1653     info->mallocs      = irecv[4];
1654   }
1655   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1656   info->fill_ratio_needed = 0;
1657   info->factor_mallocs    = 0;
1658   PetscFunctionReturn(PETSC_SUCCESS);
1659 }
1660 
1661 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1662 {
1663   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1664 
1665   PetscFunctionBegin;
1666   switch (op) {
1667   case MAT_NEW_NONZERO_LOCATIONS:
1668   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1669   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1670   case MAT_KEEP_NONZERO_PATTERN:
1671   case MAT_NEW_NONZERO_LOCATION_ERR:
1672   case MAT_USE_INODES:
1673   case MAT_IGNORE_ZERO_ENTRIES:
1674   case MAT_FORM_EXPLICIT_TRANSPOSE:
1675     MatCheckPreallocated(A, 1);
1676     PetscCall(MatSetOption(a->A, op, flg));
1677     PetscCall(MatSetOption(a->B, op, flg));
1678     break;
1679   case MAT_ROW_ORIENTED:
1680     MatCheckPreallocated(A, 1);
1681     a->roworiented = flg;
1682 
1683     PetscCall(MatSetOption(a->A, op, flg));
1684     PetscCall(MatSetOption(a->B, op, flg));
1685     break;
1686   case MAT_FORCE_DIAGONAL_ENTRIES:
1687   case MAT_SORTED_FULL:
1688     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1689     break;
1690   case MAT_IGNORE_OFF_PROC_ENTRIES:
1691     a->donotstash = flg;
1692     break;
1693   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1694   case MAT_SPD:
1695   case MAT_SYMMETRIC:
1696   case MAT_STRUCTURALLY_SYMMETRIC:
1697   case MAT_HERMITIAN:
1698   case MAT_SYMMETRY_ETERNAL:
1699   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1700   case MAT_SPD_ETERNAL:
1701     /* if the diagonal matrix is square it inherits some of the properties above */
1702     break;
1703   case MAT_SUBMAT_SINGLEIS:
1704     A->submat_singleis = flg;
1705     break;
1706   case MAT_STRUCTURE_ONLY:
1707     /* The option is handled directly by MatSetOption() */
1708     break;
1709   default:
1710     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1711   }
1712   PetscFunctionReturn(PETSC_SUCCESS);
1713 }
1714 
1715 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1716 {
1717   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1718   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1719   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1720   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1721   PetscInt    *cmap, *idx_p;
1722 
1723   PetscFunctionBegin;
1724   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1725   mat->getrowactive = PETSC_TRUE;
1726 
1727   if (!mat->rowvalues && (idx || v)) {
1728     /*
1729         allocate enough space to hold information from the longest row.
1730     */
1731     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1732     PetscInt    max = 1, tmp;
1733     for (i = 0; i < matin->rmap->n; i++) {
1734       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1735       if (max < tmp) max = tmp;
1736     }
1737     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1738   }
1739 
1740   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1741   lrow = row - rstart;
1742 
1743   pvA = &vworkA;
1744   pcA = &cworkA;
1745   pvB = &vworkB;
1746   pcB = &cworkB;
1747   if (!v) {
1748     pvA = NULL;
1749     pvB = NULL;
1750   }
1751   if (!idx) {
1752     pcA = NULL;
1753     if (!v) pcB = NULL;
1754   }
1755   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1756   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1757   nztot = nzA + nzB;
1758 
1759   cmap = mat->garray;
1760   if (v || idx) {
1761     if (nztot) {
1762       /* Sort by increasing column numbers, assuming A and B already sorted */
1763       PetscInt imark = -1;
1764       if (v) {
1765         *v = v_p = mat->rowvalues;
1766         for (i = 0; i < nzB; i++) {
1767           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1768           else break;
1769         }
1770         imark = i;
1771         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1772         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1773       }
1774       if (idx) {
1775         *idx = idx_p = mat->rowindices;
1776         if (imark > -1) {
1777           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1778         } else {
1779           for (i = 0; i < nzB; i++) {
1780             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1781             else break;
1782           }
1783           imark = i;
1784         }
1785         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1786         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1787       }
1788     } else {
1789       if (idx) *idx = NULL;
1790       if (v) *v = NULL;
1791     }
1792   }
1793   *nz = nztot;
1794   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1795   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1796   PetscFunctionReturn(PETSC_SUCCESS);
1797 }
1798 
1799 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1800 {
1801   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1802 
1803   PetscFunctionBegin;
1804   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1805   aij->getrowactive = PETSC_FALSE;
1806   PetscFunctionReturn(PETSC_SUCCESS);
1807 }
1808 
1809 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1810 {
1811   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1812   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1813   PetscInt         i, j, cstart = mat->cmap->rstart;
1814   PetscReal        sum = 0.0;
1815   const MatScalar *v, *amata, *bmata;
1816 
1817   PetscFunctionBegin;
1818   if (aij->size == 1) {
1819     PetscCall(MatNorm(aij->A, type, norm));
1820   } else {
1821     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1822     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1823     if (type == NORM_FROBENIUS) {
1824       v = amata;
1825       for (i = 0; i < amat->nz; i++) {
1826         sum += PetscRealPart(PetscConj(*v) * (*v));
1827         v++;
1828       }
1829       v = bmata;
1830       for (i = 0; i < bmat->nz; i++) {
1831         sum += PetscRealPart(PetscConj(*v) * (*v));
1832         v++;
1833       }
1834       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1835       *norm = PetscSqrtReal(*norm);
1836       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1837     } else if (type == NORM_1) { /* max column norm */
1838       PetscReal *tmp, *tmp2;
1839       PetscInt  *jj, *garray = aij->garray;
1840       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1841       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1842       *norm = 0.0;
1843       v     = amata;
1844       jj    = amat->j;
1845       for (j = 0; j < amat->nz; j++) {
1846         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1847         v++;
1848       }
1849       v  = bmata;
1850       jj = bmat->j;
1851       for (j = 0; j < bmat->nz; j++) {
1852         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1853         v++;
1854       }
1855       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1856       for (j = 0; j < mat->cmap->N; j++) {
1857         if (tmp2[j] > *norm) *norm = tmp2[j];
1858       }
1859       PetscCall(PetscFree(tmp));
1860       PetscCall(PetscFree(tmp2));
1861       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1862     } else if (type == NORM_INFINITY) { /* max row norm */
1863       PetscReal ntemp = 0.0;
1864       for (j = 0; j < aij->A->rmap->n; j++) {
1865         v   = amata + amat->i[j];
1866         sum = 0.0;
1867         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1868           sum += PetscAbsScalar(*v);
1869           v++;
1870         }
1871         v = bmata + bmat->i[j];
1872         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1873           sum += PetscAbsScalar(*v);
1874           v++;
1875         }
1876         if (sum > ntemp) ntemp = sum;
1877       }
1878       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1879       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1880     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1881     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1882     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1883   }
1884   PetscFunctionReturn(PETSC_SUCCESS);
1885 }
1886 
1887 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1888 {
1889   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1890   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1891   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1892   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1893   Mat              B, A_diag, *B_diag;
1894   const MatScalar *pbv, *bv;
1895 
1896   PetscFunctionBegin;
1897   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1898   ma = A->rmap->n;
1899   na = A->cmap->n;
1900   mb = a->B->rmap->n;
1901   nb = a->B->cmap->n;
1902   ai = Aloc->i;
1903   aj = Aloc->j;
1904   bi = Bloc->i;
1905   bj = Bloc->j;
1906   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1907     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1908     PetscSFNode         *oloc;
1909     PETSC_UNUSED PetscSF sf;
1910 
1911     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1912     /* compute d_nnz for preallocation */
1913     PetscCall(PetscArrayzero(d_nnz, na));
1914     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1915     /* compute local off-diagonal contributions */
1916     PetscCall(PetscArrayzero(g_nnz, nb));
1917     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1918     /* map those to global */
1919     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1920     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1921     PetscCall(PetscSFSetFromOptions(sf));
1922     PetscCall(PetscArrayzero(o_nnz, na));
1923     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1924     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1925     PetscCall(PetscSFDestroy(&sf));
1926 
1927     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1928     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1929     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1930     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1931     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1932     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1933   } else {
1934     B = *matout;
1935     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1936   }
1937 
1938   b           = (Mat_MPIAIJ *)B->data;
1939   A_diag      = a->A;
1940   B_diag      = &b->A;
1941   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1942   A_diag_ncol = A_diag->cmap->N;
1943   B_diag_ilen = sub_B_diag->ilen;
1944   B_diag_i    = sub_B_diag->i;
1945 
1946   /* Set ilen for diagonal of B */
1947   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1948 
1949   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1950   very quickly (=without using MatSetValues), because all writes are local. */
1951   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1952   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1953 
1954   /* copy over the B part */
1955   PetscCall(PetscMalloc1(bi[mb], &cols));
1956   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1957   pbv = bv;
1958   row = A->rmap->rstart;
1959   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1960   cols_tmp = cols;
1961   for (i = 0; i < mb; i++) {
1962     ncol = bi[i + 1] - bi[i];
1963     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1964     row++;
1965     pbv += ncol;
1966     cols_tmp += ncol;
1967   }
1968   PetscCall(PetscFree(cols));
1969   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1970 
1971   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1972   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1973   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1974     *matout = B;
1975   } else {
1976     PetscCall(MatHeaderMerge(A, &B));
1977   }
1978   PetscFunctionReturn(PETSC_SUCCESS);
1979 }
1980 
1981 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1982 {
1983   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1984   Mat         a = aij->A, b = aij->B;
1985   PetscInt    s1, s2, s3;
1986 
1987   PetscFunctionBegin;
1988   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1989   if (rr) {
1990     PetscCall(VecGetLocalSize(rr, &s1));
1991     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1992     /* Overlap communication with computation. */
1993     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1994   }
1995   if (ll) {
1996     PetscCall(VecGetLocalSize(ll, &s1));
1997     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1998     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1999   }
2000   /* scale  the diagonal block */
2001   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2002 
2003   if (rr) {
2004     /* Do a scatter end and then right scale the off-diagonal block */
2005     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2006     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2007   }
2008   PetscFunctionReturn(PETSC_SUCCESS);
2009 }
2010 
2011 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2012 {
2013   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2014 
2015   PetscFunctionBegin;
2016   PetscCall(MatSetUnfactored(a->A));
2017   PetscFunctionReturn(PETSC_SUCCESS);
2018 }
2019 
2020 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2021 {
2022   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2023   Mat         a, b, c, d;
2024   PetscBool   flg;
2025 
2026   PetscFunctionBegin;
2027   a = matA->A;
2028   b = matA->B;
2029   c = matB->A;
2030   d = matB->B;
2031 
2032   PetscCall(MatEqual(a, c, &flg));
2033   if (flg) PetscCall(MatEqual(b, d, &flg));
2034   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2035   PetscFunctionReturn(PETSC_SUCCESS);
2036 }
2037 
2038 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2039 {
2040   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2041   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2042 
2043   PetscFunctionBegin;
2044   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2045   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2046     /* because of the column compression in the off-processor part of the matrix a->B,
2047        the number of columns in a->B and b->B may be different, hence we cannot call
2048        the MatCopy() directly on the two parts. If need be, we can provide a more
2049        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2050        then copying the submatrices */
2051     PetscCall(MatCopy_Basic(A, B, str));
2052   } else {
2053     PetscCall(MatCopy(a->A, b->A, str));
2054     PetscCall(MatCopy(a->B, b->B, str));
2055   }
2056   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2057   PetscFunctionReturn(PETSC_SUCCESS);
2058 }
2059 
2060 /*
2061    Computes the number of nonzeros per row needed for preallocation when X and Y
2062    have different nonzero structure.
2063 */
2064 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2065 {
2066   PetscInt i, j, k, nzx, nzy;
2067 
2068   PetscFunctionBegin;
2069   /* Set the number of nonzeros in the new matrix */
2070   for (i = 0; i < m; i++) {
2071     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2072     nzx    = xi[i + 1] - xi[i];
2073     nzy    = yi[i + 1] - yi[i];
2074     nnz[i] = 0;
2075     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2076       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2077       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2078       nnz[i]++;
2079     }
2080     for (; k < nzy; k++) nnz[i]++;
2081   }
2082   PetscFunctionReturn(PETSC_SUCCESS);
2083 }
2084 
2085 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2086 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2087 {
2088   PetscInt    m = Y->rmap->N;
2089   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2090   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2091 
2092   PetscFunctionBegin;
2093   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2094   PetscFunctionReturn(PETSC_SUCCESS);
2095 }
2096 
2097 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2098 {
2099   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2100 
2101   PetscFunctionBegin;
2102   if (str == SAME_NONZERO_PATTERN) {
2103     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2104     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2105   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2106     PetscCall(MatAXPY_Basic(Y, a, X, str));
2107   } else {
2108     Mat       B;
2109     PetscInt *nnz_d, *nnz_o;
2110 
2111     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2112     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2113     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2114     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2115     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2116     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2117     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2118     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2119     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2120     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2121     PetscCall(MatHeaderMerge(Y, &B));
2122     PetscCall(PetscFree(nnz_d));
2123     PetscCall(PetscFree(nnz_o));
2124   }
2125   PetscFunctionReturn(PETSC_SUCCESS);
2126 }
2127 
2128 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2129 
2130 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2131 {
2132   PetscFunctionBegin;
2133   if (PetscDefined(USE_COMPLEX)) {
2134     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2135 
2136     PetscCall(MatConjugate_SeqAIJ(aij->A));
2137     PetscCall(MatConjugate_SeqAIJ(aij->B));
2138   }
2139   PetscFunctionReturn(PETSC_SUCCESS);
2140 }
2141 
2142 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2143 {
2144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2145 
2146   PetscFunctionBegin;
2147   PetscCall(MatRealPart(a->A));
2148   PetscCall(MatRealPart(a->B));
2149   PetscFunctionReturn(PETSC_SUCCESS);
2150 }
2151 
2152 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2153 {
2154   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2155 
2156   PetscFunctionBegin;
2157   PetscCall(MatImaginaryPart(a->A));
2158   PetscCall(MatImaginaryPart(a->B));
2159   PetscFunctionReturn(PETSC_SUCCESS);
2160 }
2161 
2162 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2163 {
2164   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2165   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2166   PetscScalar       *va, *vv;
2167   Vec                vB, vA;
2168   const PetscScalar *vb;
2169 
2170   PetscFunctionBegin;
2171   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2172   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2173 
2174   PetscCall(VecGetArrayWrite(vA, &va));
2175   if (idx) {
2176     for (i = 0; i < m; i++) {
2177       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2178     }
2179   }
2180 
2181   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2182   PetscCall(PetscMalloc1(m, &idxb));
2183   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2184 
2185   PetscCall(VecGetArrayWrite(v, &vv));
2186   PetscCall(VecGetArrayRead(vB, &vb));
2187   for (i = 0; i < m; i++) {
2188     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2189       vv[i] = vb[i];
2190       if (idx) idx[i] = a->garray[idxb[i]];
2191     } else {
2192       vv[i] = va[i];
2193       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2194     }
2195   }
2196   PetscCall(VecRestoreArrayWrite(vA, &vv));
2197   PetscCall(VecRestoreArrayWrite(vA, &va));
2198   PetscCall(VecRestoreArrayRead(vB, &vb));
2199   PetscCall(PetscFree(idxb));
2200   PetscCall(VecDestroy(&vA));
2201   PetscCall(VecDestroy(&vB));
2202   PetscFunctionReturn(PETSC_SUCCESS);
2203 }
2204 
2205 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2206 {
2207   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2208   PetscInt           m = A->rmap->n, n = A->cmap->n;
2209   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2210   PetscInt          *cmap = mat->garray;
2211   PetscInt          *diagIdx, *offdiagIdx;
2212   Vec                diagV, offdiagV;
2213   PetscScalar       *a, *diagA, *offdiagA;
2214   const PetscScalar *ba, *bav;
2215   PetscInt           r, j, col, ncols, *bi, *bj;
2216   Mat                B = mat->B;
2217   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2218 
2219   PetscFunctionBegin;
2220   /* When a process holds entire A and other processes have no entry */
2221   if (A->cmap->N == n) {
2222     PetscCall(VecGetArrayWrite(v, &diagA));
2223     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2224     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2225     PetscCall(VecDestroy(&diagV));
2226     PetscCall(VecRestoreArrayWrite(v, &diagA));
2227     PetscFunctionReturn(PETSC_SUCCESS);
2228   } else if (n == 0) {
2229     if (m) {
2230       PetscCall(VecGetArrayWrite(v, &a));
2231       for (r = 0; r < m; r++) {
2232         a[r] = 0.0;
2233         if (idx) idx[r] = -1;
2234       }
2235       PetscCall(VecRestoreArrayWrite(v, &a));
2236     }
2237     PetscFunctionReturn(PETSC_SUCCESS);
2238   }
2239 
2240   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2241   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2242   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2243   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2244 
2245   /* Get offdiagIdx[] for implicit 0.0 */
2246   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2247   ba = bav;
2248   bi = b->i;
2249   bj = b->j;
2250   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2251   for (r = 0; r < m; r++) {
2252     ncols = bi[r + 1] - bi[r];
2253     if (ncols == A->cmap->N - n) { /* Brow is dense */
2254       offdiagA[r]   = *ba;
2255       offdiagIdx[r] = cmap[0];
2256     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2257       offdiagA[r] = 0.0;
2258 
2259       /* Find first hole in the cmap */
2260       for (j = 0; j < ncols; j++) {
2261         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2262         if (col > j && j < cstart) {
2263           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2264           break;
2265         } else if (col > j + n && j >= cstart) {
2266           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2267           break;
2268         }
2269       }
2270       if (j == ncols && ncols < A->cmap->N - n) {
2271         /* a hole is outside compressed Bcols */
2272         if (ncols == 0) {
2273           if (cstart) {
2274             offdiagIdx[r] = 0;
2275           } else offdiagIdx[r] = cend;
2276         } else { /* ncols > 0 */
2277           offdiagIdx[r] = cmap[ncols - 1] + 1;
2278           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2279         }
2280       }
2281     }
2282 
2283     for (j = 0; j < ncols; j++) {
2284       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2285         offdiagA[r]   = *ba;
2286         offdiagIdx[r] = cmap[*bj];
2287       }
2288       ba++;
2289       bj++;
2290     }
2291   }
2292 
2293   PetscCall(VecGetArrayWrite(v, &a));
2294   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2295   for (r = 0; r < m; ++r) {
2296     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2297       a[r] = diagA[r];
2298       if (idx) idx[r] = cstart + diagIdx[r];
2299     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2300       a[r] = diagA[r];
2301       if (idx) {
2302         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2303           idx[r] = cstart + diagIdx[r];
2304         } else idx[r] = offdiagIdx[r];
2305       }
2306     } else {
2307       a[r] = offdiagA[r];
2308       if (idx) idx[r] = offdiagIdx[r];
2309     }
2310   }
2311   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2312   PetscCall(VecRestoreArrayWrite(v, &a));
2313   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2314   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2315   PetscCall(VecDestroy(&diagV));
2316   PetscCall(VecDestroy(&offdiagV));
2317   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2318   PetscFunctionReturn(PETSC_SUCCESS);
2319 }
2320 
2321 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2322 {
2323   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2324   PetscInt           m = A->rmap->n, n = A->cmap->n;
2325   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2326   PetscInt          *cmap = mat->garray;
2327   PetscInt          *diagIdx, *offdiagIdx;
2328   Vec                diagV, offdiagV;
2329   PetscScalar       *a, *diagA, *offdiagA;
2330   const PetscScalar *ba, *bav;
2331   PetscInt           r, j, col, ncols, *bi, *bj;
2332   Mat                B = mat->B;
2333   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2334 
2335   PetscFunctionBegin;
2336   /* When a process holds entire A and other processes have no entry */
2337   if (A->cmap->N == n) {
2338     PetscCall(VecGetArrayWrite(v, &diagA));
2339     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2340     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2341     PetscCall(VecDestroy(&diagV));
2342     PetscCall(VecRestoreArrayWrite(v, &diagA));
2343     PetscFunctionReturn(PETSC_SUCCESS);
2344   } else if (n == 0) {
2345     if (m) {
2346       PetscCall(VecGetArrayWrite(v, &a));
2347       for (r = 0; r < m; r++) {
2348         a[r] = PETSC_MAX_REAL;
2349         if (idx) idx[r] = -1;
2350       }
2351       PetscCall(VecRestoreArrayWrite(v, &a));
2352     }
2353     PetscFunctionReturn(PETSC_SUCCESS);
2354   }
2355 
2356   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2357   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2358   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2359   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2360 
2361   /* Get offdiagIdx[] for implicit 0.0 */
2362   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2363   ba = bav;
2364   bi = b->i;
2365   bj = b->j;
2366   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2367   for (r = 0; r < m; r++) {
2368     ncols = bi[r + 1] - bi[r];
2369     if (ncols == A->cmap->N - n) { /* Brow is dense */
2370       offdiagA[r]   = *ba;
2371       offdiagIdx[r] = cmap[0];
2372     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2373       offdiagA[r] = 0.0;
2374 
2375       /* Find first hole in the cmap */
2376       for (j = 0; j < ncols; j++) {
2377         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2378         if (col > j && j < cstart) {
2379           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2380           break;
2381         } else if (col > j + n && j >= cstart) {
2382           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2383           break;
2384         }
2385       }
2386       if (j == ncols && ncols < A->cmap->N - n) {
2387         /* a hole is outside compressed Bcols */
2388         if (ncols == 0) {
2389           if (cstart) {
2390             offdiagIdx[r] = 0;
2391           } else offdiagIdx[r] = cend;
2392         } else { /* ncols > 0 */
2393           offdiagIdx[r] = cmap[ncols - 1] + 1;
2394           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2395         }
2396       }
2397     }
2398 
2399     for (j = 0; j < ncols; j++) {
2400       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2401         offdiagA[r]   = *ba;
2402         offdiagIdx[r] = cmap[*bj];
2403       }
2404       ba++;
2405       bj++;
2406     }
2407   }
2408 
2409   PetscCall(VecGetArrayWrite(v, &a));
2410   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2411   for (r = 0; r < m; ++r) {
2412     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2413       a[r] = diagA[r];
2414       if (idx) idx[r] = cstart + diagIdx[r];
2415     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2416       a[r] = diagA[r];
2417       if (idx) {
2418         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2419           idx[r] = cstart + diagIdx[r];
2420         } else idx[r] = offdiagIdx[r];
2421       }
2422     } else {
2423       a[r] = offdiagA[r];
2424       if (idx) idx[r] = offdiagIdx[r];
2425     }
2426   }
2427   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2428   PetscCall(VecRestoreArrayWrite(v, &a));
2429   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2430   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2431   PetscCall(VecDestroy(&diagV));
2432   PetscCall(VecDestroy(&offdiagV));
2433   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2434   PetscFunctionReturn(PETSC_SUCCESS);
2435 }
2436 
2437 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2438 {
2439   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2440   PetscInt           m = A->rmap->n, n = A->cmap->n;
2441   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2442   PetscInt          *cmap = mat->garray;
2443   PetscInt          *diagIdx, *offdiagIdx;
2444   Vec                diagV, offdiagV;
2445   PetscScalar       *a, *diagA, *offdiagA;
2446   const PetscScalar *ba, *bav;
2447   PetscInt           r, j, col, ncols, *bi, *bj;
2448   Mat                B = mat->B;
2449   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2450 
2451   PetscFunctionBegin;
2452   /* When a process holds entire A and other processes have no entry */
2453   if (A->cmap->N == n) {
2454     PetscCall(VecGetArrayWrite(v, &diagA));
2455     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2456     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2457     PetscCall(VecDestroy(&diagV));
2458     PetscCall(VecRestoreArrayWrite(v, &diagA));
2459     PetscFunctionReturn(PETSC_SUCCESS);
2460   } else if (n == 0) {
2461     if (m) {
2462       PetscCall(VecGetArrayWrite(v, &a));
2463       for (r = 0; r < m; r++) {
2464         a[r] = PETSC_MIN_REAL;
2465         if (idx) idx[r] = -1;
2466       }
2467       PetscCall(VecRestoreArrayWrite(v, &a));
2468     }
2469     PetscFunctionReturn(PETSC_SUCCESS);
2470   }
2471 
2472   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2473   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2474   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2475   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2476 
2477   /* Get offdiagIdx[] for implicit 0.0 */
2478   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2479   ba = bav;
2480   bi = b->i;
2481   bj = b->j;
2482   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2483   for (r = 0; r < m; r++) {
2484     ncols = bi[r + 1] - bi[r];
2485     if (ncols == A->cmap->N - n) { /* Brow is dense */
2486       offdiagA[r]   = *ba;
2487       offdiagIdx[r] = cmap[0];
2488     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2489       offdiagA[r] = 0.0;
2490 
2491       /* Find first hole in the cmap */
2492       for (j = 0; j < ncols; j++) {
2493         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2494         if (col > j && j < cstart) {
2495           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2496           break;
2497         } else if (col > j + n && j >= cstart) {
2498           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2499           break;
2500         }
2501       }
2502       if (j == ncols && ncols < A->cmap->N - n) {
2503         /* a hole is outside compressed Bcols */
2504         if (ncols == 0) {
2505           if (cstart) {
2506             offdiagIdx[r] = 0;
2507           } else offdiagIdx[r] = cend;
2508         } else { /* ncols > 0 */
2509           offdiagIdx[r] = cmap[ncols - 1] + 1;
2510           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2511         }
2512       }
2513     }
2514 
2515     for (j = 0; j < ncols; j++) {
2516       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2517         offdiagA[r]   = *ba;
2518         offdiagIdx[r] = cmap[*bj];
2519       }
2520       ba++;
2521       bj++;
2522     }
2523   }
2524 
2525   PetscCall(VecGetArrayWrite(v, &a));
2526   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2527   for (r = 0; r < m; ++r) {
2528     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2529       a[r] = diagA[r];
2530       if (idx) idx[r] = cstart + diagIdx[r];
2531     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2532       a[r] = diagA[r];
2533       if (idx) {
2534         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2535           idx[r] = cstart + diagIdx[r];
2536         } else idx[r] = offdiagIdx[r];
2537       }
2538     } else {
2539       a[r] = offdiagA[r];
2540       if (idx) idx[r] = offdiagIdx[r];
2541     }
2542   }
2543   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2544   PetscCall(VecRestoreArrayWrite(v, &a));
2545   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2546   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2547   PetscCall(VecDestroy(&diagV));
2548   PetscCall(VecDestroy(&offdiagV));
2549   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2550   PetscFunctionReturn(PETSC_SUCCESS);
2551 }
2552 
2553 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2554 {
2555   Mat *dummy;
2556 
2557   PetscFunctionBegin;
2558   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2559   *newmat = *dummy;
2560   PetscCall(PetscFree(dummy));
2561   PetscFunctionReturn(PETSC_SUCCESS);
2562 }
2563 
2564 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2565 {
2566   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2567 
2568   PetscFunctionBegin;
2569   PetscCall(MatInvertBlockDiagonal(a->A, values));
2570   A->factorerrortype = a->A->factorerrortype;
2571   PetscFunctionReturn(PETSC_SUCCESS);
2572 }
2573 
2574 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2575 {
2576   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2577 
2578   PetscFunctionBegin;
2579   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2580   PetscCall(MatSetRandom(aij->A, rctx));
2581   if (x->assembled) {
2582     PetscCall(MatSetRandom(aij->B, rctx));
2583   } else {
2584     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2585   }
2586   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2587   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2588   PetscFunctionReturn(PETSC_SUCCESS);
2589 }
2590 
2591 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2592 {
2593   PetscFunctionBegin;
2594   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2595   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2596   PetscFunctionReturn(PETSC_SUCCESS);
2597 }
2598 
2599 /*@
2600   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2601 
2602   Not Collective
2603 
2604   Input Parameter:
2605 . A - the matrix
2606 
2607   Output Parameter:
2608 . nz - the number of nonzeros
2609 
2610   Level: advanced
2611 
2612 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2613 @*/
2614 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2615 {
2616   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2617   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2618   PetscBool   isaij;
2619 
2620   PetscFunctionBegin;
2621   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2622   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2623   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2624   PetscFunctionReturn(PETSC_SUCCESS);
2625 }
2626 
2627 /*@
2628   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2629 
2630   Collective
2631 
2632   Input Parameters:
2633 + A  - the matrix
2634 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2635 
2636   Level: advanced
2637 
2638 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2639 @*/
2640 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2641 {
2642   PetscFunctionBegin;
2643   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2644   PetscFunctionReturn(PETSC_SUCCESS);
2645 }
2646 
2647 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2648 {
2649   PetscBool sc = PETSC_FALSE, flg;
2650 
2651   PetscFunctionBegin;
2652   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2653   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2654   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2655   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2656   PetscOptionsHeadEnd();
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2661 {
2662   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2663   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2664 
2665   PetscFunctionBegin;
2666   if (!Y->preallocated) {
2667     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2668   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2669     PetscInt nonew = aij->nonew;
2670     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2671     aij->nonew = nonew;
2672   }
2673   PetscCall(MatShift_Basic(Y, a));
2674   PetscFunctionReturn(PETSC_SUCCESS);
2675 }
2676 
2677 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2678 {
2679   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2680 
2681   PetscFunctionBegin;
2682   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2683   PetscCall(MatMissingDiagonal(a->A, missing, d));
2684   if (d) {
2685     PetscInt rstart;
2686     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2687     *d += rstart;
2688   }
2689   PetscFunctionReturn(PETSC_SUCCESS);
2690 }
2691 
2692 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2693 {
2694   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2695 
2696   PetscFunctionBegin;
2697   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2698   PetscFunctionReturn(PETSC_SUCCESS);
2699 }
2700 
2701 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2702 {
2703   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2704 
2705   PetscFunctionBegin;
2706   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2707   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2708   PetscFunctionReturn(PETSC_SUCCESS);
2709 }
2710 
2711 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2712                                        MatGetRow_MPIAIJ,
2713                                        MatRestoreRow_MPIAIJ,
2714                                        MatMult_MPIAIJ,
2715                                        /* 4*/ MatMultAdd_MPIAIJ,
2716                                        MatMultTranspose_MPIAIJ,
2717                                        MatMultTransposeAdd_MPIAIJ,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                        /*10*/ NULL,
2722                                        NULL,
2723                                        NULL,
2724                                        MatSOR_MPIAIJ,
2725                                        MatTranspose_MPIAIJ,
2726                                        /*15*/ MatGetInfo_MPIAIJ,
2727                                        MatEqual_MPIAIJ,
2728                                        MatGetDiagonal_MPIAIJ,
2729                                        MatDiagonalScale_MPIAIJ,
2730                                        MatNorm_MPIAIJ,
2731                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2732                                        MatAssemblyEnd_MPIAIJ,
2733                                        MatSetOption_MPIAIJ,
2734                                        MatZeroEntries_MPIAIJ,
2735                                        /*24*/ MatZeroRows_MPIAIJ,
2736                                        NULL,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                        /*29*/ MatSetUp_MPI_Hash,
2741                                        NULL,
2742                                        NULL,
2743                                        MatGetDiagonalBlock_MPIAIJ,
2744                                        NULL,
2745                                        /*34*/ MatDuplicate_MPIAIJ,
2746                                        NULL,
2747                                        NULL,
2748                                        NULL,
2749                                        NULL,
2750                                        /*39*/ MatAXPY_MPIAIJ,
2751                                        MatCreateSubMatrices_MPIAIJ,
2752                                        MatIncreaseOverlap_MPIAIJ,
2753                                        MatGetValues_MPIAIJ,
2754                                        MatCopy_MPIAIJ,
2755                                        /*44*/ MatGetRowMax_MPIAIJ,
2756                                        MatScale_MPIAIJ,
2757                                        MatShift_MPIAIJ,
2758                                        MatDiagonalSet_MPIAIJ,
2759                                        MatZeroRowsColumns_MPIAIJ,
2760                                        /*49*/ MatSetRandom_MPIAIJ,
2761                                        MatGetRowIJ_MPIAIJ,
2762                                        MatRestoreRowIJ_MPIAIJ,
2763                                        NULL,
2764                                        NULL,
2765                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2766                                        NULL,
2767                                        MatSetUnfactored_MPIAIJ,
2768                                        MatPermute_MPIAIJ,
2769                                        NULL,
2770                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2771                                        MatDestroy_MPIAIJ,
2772                                        MatView_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                        /*64*/ NULL,
2776                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2777                                        NULL,
2778                                        NULL,
2779                                        NULL,
2780                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2781                                        MatGetRowMinAbs_MPIAIJ,
2782                                        NULL,
2783                                        NULL,
2784                                        NULL,
2785                                        NULL,
2786                                        /*75*/ MatFDColoringApply_AIJ,
2787                                        MatSetFromOptions_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        MatFindZeroDiagonals_MPIAIJ,
2791                                        /*80*/ NULL,
2792                                        NULL,
2793                                        NULL,
2794                                        /*83*/ MatLoad_MPIAIJ,
2795                                        MatIsSymmetric_MPIAIJ,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        /*89*/ NULL,
2801                                        NULL,
2802                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2803                                        NULL,
2804                                        NULL,
2805                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2806                                        NULL,
2807                                        NULL,
2808                                        NULL,
2809                                        MatBindToCPU_MPIAIJ,
2810                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2811                                        NULL,
2812                                        NULL,
2813                                        MatConjugate_MPIAIJ,
2814                                        NULL,
2815                                        /*104*/ MatSetValuesRow_MPIAIJ,
2816                                        MatRealPart_MPIAIJ,
2817                                        MatImaginaryPart_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        /*109*/ NULL,
2821                                        NULL,
2822                                        MatGetRowMin_MPIAIJ,
2823                                        NULL,
2824                                        MatMissingDiagonal_MPIAIJ,
2825                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2826                                        NULL,
2827                                        MatGetGhosts_MPIAIJ,
2828                                        NULL,
2829                                        NULL,
2830                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        NULL,
2834                                        MatGetMultiProcBlock_MPIAIJ,
2835                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2836                                        MatGetColumnReductions_MPIAIJ,
2837                                        MatInvertBlockDiagonal_MPIAIJ,
2838                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2839                                        MatCreateSubMatricesMPI_MPIAIJ,
2840                                        /*129*/ NULL,
2841                                        NULL,
2842                                        NULL,
2843                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2844                                        NULL,
2845                                        /*134*/ NULL,
2846                                        NULL,
2847                                        NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2851                                        NULL,
2852                                        NULL,
2853                                        MatFDColoringSetUp_MPIXAIJ,
2854                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2855                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2856                                        /*145*/ NULL,
2857                                        NULL,
2858                                        NULL,
2859                                        MatCreateGraph_Simple_AIJ,
2860                                        NULL,
2861                                        /*150*/ NULL,
2862                                        MatEliminateZeros_MPIAIJ};
2863 
2864 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2865 {
2866   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2867 
2868   PetscFunctionBegin;
2869   PetscCall(MatStoreValues(aij->A));
2870   PetscCall(MatStoreValues(aij->B));
2871   PetscFunctionReturn(PETSC_SUCCESS);
2872 }
2873 
2874 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2875 {
2876   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2877 
2878   PetscFunctionBegin;
2879   PetscCall(MatRetrieveValues(aij->A));
2880   PetscCall(MatRetrieveValues(aij->B));
2881   PetscFunctionReturn(PETSC_SUCCESS);
2882 }
2883 
2884 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2885 {
2886   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2887   PetscMPIInt size;
2888 
2889   PetscFunctionBegin;
2890   if (B->hash_active) {
2891     B->ops[0]      = b->cops;
2892     B->hash_active = PETSC_FALSE;
2893   }
2894   PetscCall(PetscLayoutSetUp(B->rmap));
2895   PetscCall(PetscLayoutSetUp(B->cmap));
2896 
2897 #if defined(PETSC_USE_CTABLE)
2898   PetscCall(PetscHMapIDestroy(&b->colmap));
2899 #else
2900   PetscCall(PetscFree(b->colmap));
2901 #endif
2902   PetscCall(PetscFree(b->garray));
2903   PetscCall(VecDestroy(&b->lvec));
2904   PetscCall(VecScatterDestroy(&b->Mvctx));
2905 
2906   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2907   PetscCall(MatDestroy(&b->B));
2908   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2909   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2910   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2911   PetscCall(MatSetType(b->B, MATSEQAIJ));
2912 
2913   PetscCall(MatDestroy(&b->A));
2914   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2915   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2916   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2917   PetscCall(MatSetType(b->A, MATSEQAIJ));
2918 
2919   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2920   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2921   B->preallocated  = PETSC_TRUE;
2922   B->was_assembled = PETSC_FALSE;
2923   B->assembled     = PETSC_FALSE;
2924   PetscFunctionReturn(PETSC_SUCCESS);
2925 }
2926 
2927 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2928 {
2929   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2930 
2931   PetscFunctionBegin;
2932   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2933   PetscCall(PetscLayoutSetUp(B->rmap));
2934   PetscCall(PetscLayoutSetUp(B->cmap));
2935 
2936 #if defined(PETSC_USE_CTABLE)
2937   PetscCall(PetscHMapIDestroy(&b->colmap));
2938 #else
2939   PetscCall(PetscFree(b->colmap));
2940 #endif
2941   PetscCall(PetscFree(b->garray));
2942   PetscCall(VecDestroy(&b->lvec));
2943   PetscCall(VecScatterDestroy(&b->Mvctx));
2944 
2945   PetscCall(MatResetPreallocation(b->A));
2946   PetscCall(MatResetPreallocation(b->B));
2947   B->preallocated  = PETSC_TRUE;
2948   B->was_assembled = PETSC_FALSE;
2949   B->assembled     = PETSC_FALSE;
2950   PetscFunctionReturn(PETSC_SUCCESS);
2951 }
2952 
2953 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2954 {
2955   Mat         mat;
2956   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2957 
2958   PetscFunctionBegin;
2959   *newmat = NULL;
2960   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2961   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2962   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2963   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2964   a = (Mat_MPIAIJ *)mat->data;
2965 
2966   mat->factortype   = matin->factortype;
2967   mat->assembled    = matin->assembled;
2968   mat->insertmode   = NOT_SET_VALUES;
2969   mat->preallocated = matin->preallocated;
2970 
2971   a->size         = oldmat->size;
2972   a->rank         = oldmat->rank;
2973   a->donotstash   = oldmat->donotstash;
2974   a->roworiented  = oldmat->roworiented;
2975   a->rowindices   = NULL;
2976   a->rowvalues    = NULL;
2977   a->getrowactive = PETSC_FALSE;
2978 
2979   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2980   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2981 
2982   if (oldmat->colmap) {
2983 #if defined(PETSC_USE_CTABLE)
2984     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2985 #else
2986     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2987     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2988 #endif
2989   } else a->colmap = NULL;
2990   if (oldmat->garray) {
2991     PetscInt len;
2992     len = oldmat->B->cmap->n;
2993     PetscCall(PetscMalloc1(len + 1, &a->garray));
2994     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2995   } else a->garray = NULL;
2996 
2997   /* It may happen MatDuplicate is called with a non-assembled matrix
2998      In fact, MatDuplicate only requires the matrix to be preallocated
2999      This may happen inside a DMCreateMatrix_Shell */
3000   if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3001   if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3002   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3003   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3004   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3005   *newmat = mat;
3006   PetscFunctionReturn(PETSC_SUCCESS);
3007 }
3008 
3009 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3010 {
3011   PetscBool isbinary, ishdf5;
3012 
3013   PetscFunctionBegin;
3014   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3015   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3016   /* force binary viewer to load .info file if it has not yet done so */
3017   PetscCall(PetscViewerSetUp(viewer));
3018   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3020   if (isbinary) {
3021     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3022   } else if (ishdf5) {
3023 #if defined(PETSC_HAVE_HDF5)
3024     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3025 #else
3026     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3027 #endif
3028   } else {
3029     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3030   }
3031   PetscFunctionReturn(PETSC_SUCCESS);
3032 }
3033 
3034 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3035 {
3036   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3037   PetscInt    *rowidxs, *colidxs;
3038   PetscScalar *matvals;
3039 
3040   PetscFunctionBegin;
3041   PetscCall(PetscViewerSetUp(viewer));
3042 
3043   /* read in matrix header */
3044   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3045   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3046   M  = header[1];
3047   N  = header[2];
3048   nz = header[3];
3049   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3050   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3051   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3052 
3053   /* set block sizes from the viewer's .info file */
3054   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3055   /* set global sizes if not set already */
3056   if (mat->rmap->N < 0) mat->rmap->N = M;
3057   if (mat->cmap->N < 0) mat->cmap->N = N;
3058   PetscCall(PetscLayoutSetUp(mat->rmap));
3059   PetscCall(PetscLayoutSetUp(mat->cmap));
3060 
3061   /* check if the matrix sizes are correct */
3062   PetscCall(MatGetSize(mat, &rows, &cols));
3063   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3064 
3065   /* read in row lengths and build row indices */
3066   PetscCall(MatGetLocalSize(mat, &m, NULL));
3067   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3068   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3069   rowidxs[0] = 0;
3070   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3071   if (nz != PETSC_MAX_INT) {
3072     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3073     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3074   }
3075 
3076   /* read in column indices and matrix values */
3077   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3078   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3079   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3080   /* store matrix indices and values */
3081   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3082   PetscCall(PetscFree(rowidxs));
3083   PetscCall(PetscFree2(colidxs, matvals));
3084   PetscFunctionReturn(PETSC_SUCCESS);
3085 }
3086 
3087 /* Not scalable because of ISAllGather() unless getting all columns. */
3088 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3089 {
3090   IS          iscol_local;
3091   PetscBool   isstride;
3092   PetscMPIInt lisstride = 0, gisstride;
3093 
3094   PetscFunctionBegin;
3095   /* check if we are grabbing all columns*/
3096   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3097 
3098   if (isstride) {
3099     PetscInt start, len, mstart, mlen;
3100     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3101     PetscCall(ISGetLocalSize(iscol, &len));
3102     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3103     if (mstart == start && mlen - mstart == len) lisstride = 1;
3104   }
3105 
3106   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3107   if (gisstride) {
3108     PetscInt N;
3109     PetscCall(MatGetSize(mat, NULL, &N));
3110     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3111     PetscCall(ISSetIdentity(iscol_local));
3112     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3113   } else {
3114     PetscInt cbs;
3115     PetscCall(ISGetBlockSize(iscol, &cbs));
3116     PetscCall(ISAllGather(iscol, &iscol_local));
3117     PetscCall(ISSetBlockSize(iscol_local, cbs));
3118   }
3119 
3120   *isseq = iscol_local;
3121   PetscFunctionReturn(PETSC_SUCCESS);
3122 }
3123 
3124 /*
3125  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3126  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3127 
3128  Input Parameters:
3129 +   mat - matrix
3130 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3131            i.e., mat->rstart <= isrow[i] < mat->rend
3132 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3133            i.e., mat->cstart <= iscol[i] < mat->cend
3134 
3135  Output Parameters:
3136 +   isrow_d - sequential row index set for retrieving mat->A
3137 .   iscol_d - sequential  column index set for retrieving mat->A
3138 .   iscol_o - sequential column index set for retrieving mat->B
3139 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3140  */
3141 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3142 {
3143   Vec             x, cmap;
3144   const PetscInt *is_idx;
3145   PetscScalar    *xarray, *cmaparray;
3146   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3147   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3148   Mat             B    = a->B;
3149   Vec             lvec = a->lvec, lcmap;
3150   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3151   MPI_Comm        comm;
3152   VecScatter      Mvctx = a->Mvctx;
3153 
3154   PetscFunctionBegin;
3155   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3156   PetscCall(ISGetLocalSize(iscol, &ncols));
3157 
3158   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3159   PetscCall(MatCreateVecs(mat, &x, NULL));
3160   PetscCall(VecSet(x, -1.0));
3161   PetscCall(VecDuplicate(x, &cmap));
3162   PetscCall(VecSet(cmap, -1.0));
3163 
3164   /* Get start indices */
3165   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3166   isstart -= ncols;
3167   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3168 
3169   PetscCall(ISGetIndices(iscol, &is_idx));
3170   PetscCall(VecGetArray(x, &xarray));
3171   PetscCall(VecGetArray(cmap, &cmaparray));
3172   PetscCall(PetscMalloc1(ncols, &idx));
3173   for (i = 0; i < ncols; i++) {
3174     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3175     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3176     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3177   }
3178   PetscCall(VecRestoreArray(x, &xarray));
3179   PetscCall(VecRestoreArray(cmap, &cmaparray));
3180   PetscCall(ISRestoreIndices(iscol, &is_idx));
3181 
3182   /* Get iscol_d */
3183   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3184   PetscCall(ISGetBlockSize(iscol, &i));
3185   PetscCall(ISSetBlockSize(*iscol_d, i));
3186 
3187   /* Get isrow_d */
3188   PetscCall(ISGetLocalSize(isrow, &m));
3189   rstart = mat->rmap->rstart;
3190   PetscCall(PetscMalloc1(m, &idx));
3191   PetscCall(ISGetIndices(isrow, &is_idx));
3192   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3193   PetscCall(ISRestoreIndices(isrow, &is_idx));
3194 
3195   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3196   PetscCall(ISGetBlockSize(isrow, &i));
3197   PetscCall(ISSetBlockSize(*isrow_d, i));
3198 
3199   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3200   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3201   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3202 
3203   PetscCall(VecDuplicate(lvec, &lcmap));
3204 
3205   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3206   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3207 
3208   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3209   /* off-process column indices */
3210   count = 0;
3211   PetscCall(PetscMalloc1(Bn, &idx));
3212   PetscCall(PetscMalloc1(Bn, &cmap1));
3213 
3214   PetscCall(VecGetArray(lvec, &xarray));
3215   PetscCall(VecGetArray(lcmap, &cmaparray));
3216   for (i = 0; i < Bn; i++) {
3217     if (PetscRealPart(xarray[i]) > -1.0) {
3218       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3219       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3220       count++;
3221     }
3222   }
3223   PetscCall(VecRestoreArray(lvec, &xarray));
3224   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3225 
3226   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3227   /* cannot ensure iscol_o has same blocksize as iscol! */
3228 
3229   PetscCall(PetscFree(idx));
3230   *garray = cmap1;
3231 
3232   PetscCall(VecDestroy(&x));
3233   PetscCall(VecDestroy(&cmap));
3234   PetscCall(VecDestroy(&lcmap));
3235   PetscFunctionReturn(PETSC_SUCCESS);
3236 }
3237 
3238 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3239 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3240 {
3241   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3242   Mat         M = NULL;
3243   MPI_Comm    comm;
3244   IS          iscol_d, isrow_d, iscol_o;
3245   Mat         Asub = NULL, Bsub = NULL;
3246   PetscInt    n;
3247 
3248   PetscFunctionBegin;
3249   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3250 
3251   if (call == MAT_REUSE_MATRIX) {
3252     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3253     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3254     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3255 
3256     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3257     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3258 
3259     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3260     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3261 
3262     /* Update diagonal and off-diagonal portions of submat */
3263     asub = (Mat_MPIAIJ *)(*submat)->data;
3264     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3265     PetscCall(ISGetLocalSize(iscol_o, &n));
3266     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3267     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3268     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3269 
3270   } else { /* call == MAT_INITIAL_MATRIX) */
3271     const PetscInt *garray;
3272     PetscInt        BsubN;
3273 
3274     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3275     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3276 
3277     /* Create local submatrices Asub and Bsub */
3278     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3279     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3280 
3281     /* Create submatrix M */
3282     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3283 
3284     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3285     asub = (Mat_MPIAIJ *)M->data;
3286 
3287     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3288     n = asub->B->cmap->N;
3289     if (BsubN > n) {
3290       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3291       const PetscInt *idx;
3292       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3293       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3294 
3295       PetscCall(PetscMalloc1(n, &idx_new));
3296       j = 0;
3297       PetscCall(ISGetIndices(iscol_o, &idx));
3298       for (i = 0; i < n; i++) {
3299         if (j >= BsubN) break;
3300         while (subgarray[i] > garray[j]) j++;
3301 
3302         if (subgarray[i] == garray[j]) {
3303           idx_new[i] = idx[j++];
3304         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3305       }
3306       PetscCall(ISRestoreIndices(iscol_o, &idx));
3307 
3308       PetscCall(ISDestroy(&iscol_o));
3309       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3310 
3311     } else if (BsubN < n) {
3312       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3313     }
3314 
3315     PetscCall(PetscFree(garray));
3316     *submat = M;
3317 
3318     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3319     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3320     PetscCall(ISDestroy(&isrow_d));
3321 
3322     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3323     PetscCall(ISDestroy(&iscol_d));
3324 
3325     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3326     PetscCall(ISDestroy(&iscol_o));
3327   }
3328   PetscFunctionReturn(PETSC_SUCCESS);
3329 }
3330 
3331 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3332 {
3333   IS        iscol_local = NULL, isrow_d;
3334   PetscInt  csize;
3335   PetscInt  n, i, j, start, end;
3336   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3337   MPI_Comm  comm;
3338 
3339   PetscFunctionBegin;
3340   /* If isrow has same processor distribution as mat,
3341      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3342   if (call == MAT_REUSE_MATRIX) {
3343     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3344     if (isrow_d) {
3345       sameRowDist  = PETSC_TRUE;
3346       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3347     } else {
3348       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3349       if (iscol_local) {
3350         sameRowDist  = PETSC_TRUE;
3351         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3352       }
3353     }
3354   } else {
3355     /* Check if isrow has same processor distribution as mat */
3356     sameDist[0] = PETSC_FALSE;
3357     PetscCall(ISGetLocalSize(isrow, &n));
3358     if (!n) {
3359       sameDist[0] = PETSC_TRUE;
3360     } else {
3361       PetscCall(ISGetMinMax(isrow, &i, &j));
3362       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3363       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3364     }
3365 
3366     /* Check if iscol has same processor distribution as mat */
3367     sameDist[1] = PETSC_FALSE;
3368     PetscCall(ISGetLocalSize(iscol, &n));
3369     if (!n) {
3370       sameDist[1] = PETSC_TRUE;
3371     } else {
3372       PetscCall(ISGetMinMax(iscol, &i, &j));
3373       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3374       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3375     }
3376 
3377     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3378     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3379     sameRowDist = tsameDist[0];
3380   }
3381 
3382   if (sameRowDist) {
3383     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3384       /* isrow and iscol have same processor distribution as mat */
3385       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3386       PetscFunctionReturn(PETSC_SUCCESS);
3387     } else { /* sameRowDist */
3388       /* isrow has same processor distribution as mat */
3389       if (call == MAT_INITIAL_MATRIX) {
3390         PetscBool sorted;
3391         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3392         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3393         PetscCall(ISGetSize(iscol, &i));
3394         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3395 
3396         PetscCall(ISSorted(iscol_local, &sorted));
3397         if (sorted) {
3398           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3399           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3400           PetscFunctionReturn(PETSC_SUCCESS);
3401         }
3402       } else { /* call == MAT_REUSE_MATRIX */
3403         IS iscol_sub;
3404         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3405         if (iscol_sub) {
3406           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3407           PetscFunctionReturn(PETSC_SUCCESS);
3408         }
3409       }
3410     }
3411   }
3412 
3413   /* General case: iscol -> iscol_local which has global size of iscol */
3414   if (call == MAT_REUSE_MATRIX) {
3415     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3416     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3417   } else {
3418     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3419   }
3420 
3421   PetscCall(ISGetLocalSize(iscol, &csize));
3422   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3423 
3424   if (call == MAT_INITIAL_MATRIX) {
3425     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3426     PetscCall(ISDestroy(&iscol_local));
3427   }
3428   PetscFunctionReturn(PETSC_SUCCESS);
3429 }
3430 
3431 /*@C
3432   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3433   and "off-diagonal" part of the matrix in CSR format.
3434 
3435   Collective
3436 
3437   Input Parameters:
3438 + comm   - MPI communicator
3439 . A      - "diagonal" portion of matrix
3440 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3441 - garray - global index of `B` columns
3442 
3443   Output Parameter:
3444 . mat - the matrix, with input `A` as its local diagonal matrix
3445 
3446   Level: advanced
3447 
3448   Notes:
3449   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3450 
3451   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3452 
3453 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3454 @*/
3455 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3456 {
3457   Mat_MPIAIJ        *maij;
3458   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3459   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3460   const PetscScalar *oa;
3461   Mat                Bnew;
3462   PetscInt           m, n, N;
3463   MatType            mpi_mat_type;
3464 
3465   PetscFunctionBegin;
3466   PetscCall(MatCreate(comm, mat));
3467   PetscCall(MatGetSize(A, &m, &n));
3468   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3469   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3470   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3471   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3472 
3473   /* Get global columns of mat */
3474   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3475 
3476   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3477   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3478   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3479   PetscCall(MatSetType(*mat, mpi_mat_type));
3480 
3481   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3482   maij = (Mat_MPIAIJ *)(*mat)->data;
3483 
3484   (*mat)->preallocated = PETSC_TRUE;
3485 
3486   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3487   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3488 
3489   /* Set A as diagonal portion of *mat */
3490   maij->A = A;
3491 
3492   nz = oi[m];
3493   for (i = 0; i < nz; i++) {
3494     col   = oj[i];
3495     oj[i] = garray[col];
3496   }
3497 
3498   /* Set Bnew as off-diagonal portion of *mat */
3499   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3500   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3501   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3502   bnew        = (Mat_SeqAIJ *)Bnew->data;
3503   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3504   maij->B     = Bnew;
3505 
3506   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3507 
3508   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3509   b->free_a       = PETSC_FALSE;
3510   b->free_ij      = PETSC_FALSE;
3511   PetscCall(MatDestroy(&B));
3512 
3513   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3514   bnew->free_a       = PETSC_TRUE;
3515   bnew->free_ij      = PETSC_TRUE;
3516 
3517   /* condense columns of maij->B */
3518   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3519   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3520   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3521   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3522   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3523   PetscFunctionReturn(PETSC_SUCCESS);
3524 }
3525 
3526 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3527 
3528 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3529 {
3530   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3531   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3532   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3533   Mat             M, Msub, B = a->B;
3534   MatScalar      *aa;
3535   Mat_SeqAIJ     *aij;
3536   PetscInt       *garray = a->garray, *colsub, Ncols;
3537   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3538   IS              iscol_sub, iscmap;
3539   const PetscInt *is_idx, *cmap;
3540   PetscBool       allcolumns = PETSC_FALSE;
3541   MPI_Comm        comm;
3542 
3543   PetscFunctionBegin;
3544   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3545   if (call == MAT_REUSE_MATRIX) {
3546     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3547     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3548     PetscCall(ISGetLocalSize(iscol_sub, &count));
3549 
3550     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3551     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3552 
3553     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3554     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3555 
3556     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3557 
3558   } else { /* call == MAT_INITIAL_MATRIX) */
3559     PetscBool flg;
3560 
3561     PetscCall(ISGetLocalSize(iscol, &n));
3562     PetscCall(ISGetSize(iscol, &Ncols));
3563 
3564     /* (1) iscol -> nonscalable iscol_local */
3565     /* Check for special case: each processor gets entire matrix columns */
3566     PetscCall(ISIdentity(iscol_local, &flg));
3567     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3568     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3569     if (allcolumns) {
3570       iscol_sub = iscol_local;
3571       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3572       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3573 
3574     } else {
3575       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3576       PetscInt *idx, *cmap1, k;
3577       PetscCall(PetscMalloc1(Ncols, &idx));
3578       PetscCall(PetscMalloc1(Ncols, &cmap1));
3579       PetscCall(ISGetIndices(iscol_local, &is_idx));
3580       count = 0;
3581       k     = 0;
3582       for (i = 0; i < Ncols; i++) {
3583         j = is_idx[i];
3584         if (j >= cstart && j < cend) {
3585           /* diagonal part of mat */
3586           idx[count]     = j;
3587           cmap1[count++] = i; /* column index in submat */
3588         } else if (Bn) {
3589           /* off-diagonal part of mat */
3590           if (j == garray[k]) {
3591             idx[count]     = j;
3592             cmap1[count++] = i; /* column index in submat */
3593           } else if (j > garray[k]) {
3594             while (j > garray[k] && k < Bn - 1) k++;
3595             if (j == garray[k]) {
3596               idx[count]     = j;
3597               cmap1[count++] = i; /* column index in submat */
3598             }
3599           }
3600         }
3601       }
3602       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3603 
3604       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3605       PetscCall(ISGetBlockSize(iscol, &cbs));
3606       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3607 
3608       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3609     }
3610 
3611     /* (3) Create sequential Msub */
3612     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3613   }
3614 
3615   PetscCall(ISGetLocalSize(iscol_sub, &count));
3616   aij = (Mat_SeqAIJ *)(Msub)->data;
3617   ii  = aij->i;
3618   PetscCall(ISGetIndices(iscmap, &cmap));
3619 
3620   /*
3621       m - number of local rows
3622       Ncols - number of columns (same on all processors)
3623       rstart - first row in new global matrix generated
3624   */
3625   PetscCall(MatGetSize(Msub, &m, NULL));
3626 
3627   if (call == MAT_INITIAL_MATRIX) {
3628     /* (4) Create parallel newmat */
3629     PetscMPIInt rank, size;
3630     PetscInt    csize;
3631 
3632     PetscCallMPI(MPI_Comm_size(comm, &size));
3633     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3634 
3635     /*
3636         Determine the number of non-zeros in the diagonal and off-diagonal
3637         portions of the matrix in order to do correct preallocation
3638     */
3639 
3640     /* first get start and end of "diagonal" columns */
3641     PetscCall(ISGetLocalSize(iscol, &csize));
3642     if (csize == PETSC_DECIDE) {
3643       PetscCall(ISGetSize(isrow, &mglobal));
3644       if (mglobal == Ncols) { /* square matrix */
3645         nlocal = m;
3646       } else {
3647         nlocal = Ncols / size + ((Ncols % size) > rank);
3648       }
3649     } else {
3650       nlocal = csize;
3651     }
3652     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3653     rstart = rend - nlocal;
3654     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3655 
3656     /* next, compute all the lengths */
3657     jj = aij->j;
3658     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3659     olens = dlens + m;
3660     for (i = 0; i < m; i++) {
3661       jend = ii[i + 1] - ii[i];
3662       olen = 0;
3663       dlen = 0;
3664       for (j = 0; j < jend; j++) {
3665         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3666         else dlen++;
3667         jj++;
3668       }
3669       olens[i] = olen;
3670       dlens[i] = dlen;
3671     }
3672 
3673     PetscCall(ISGetBlockSize(isrow, &bs));
3674     PetscCall(ISGetBlockSize(iscol, &cbs));
3675 
3676     PetscCall(MatCreate(comm, &M));
3677     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3678     PetscCall(MatSetBlockSizes(M, bs, cbs));
3679     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3680     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3681     PetscCall(PetscFree(dlens));
3682 
3683   } else { /* call == MAT_REUSE_MATRIX */
3684     M = *newmat;
3685     PetscCall(MatGetLocalSize(M, &i, NULL));
3686     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3687     PetscCall(MatZeroEntries(M));
3688     /*
3689          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3690        rather than the slower MatSetValues().
3691     */
3692     M->was_assembled = PETSC_TRUE;
3693     M->assembled     = PETSC_FALSE;
3694   }
3695 
3696   /* (5) Set values of Msub to *newmat */
3697   PetscCall(PetscMalloc1(count, &colsub));
3698   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3699 
3700   jj = aij->j;
3701   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3702   for (i = 0; i < m; i++) {
3703     row = rstart + i;
3704     nz  = ii[i + 1] - ii[i];
3705     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3706     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3707     jj += nz;
3708     aa += nz;
3709   }
3710   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3711   PetscCall(ISRestoreIndices(iscmap, &cmap));
3712 
3713   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3714   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3715 
3716   PetscCall(PetscFree(colsub));
3717 
3718   /* save Msub, iscol_sub and iscmap used in processor for next request */
3719   if (call == MAT_INITIAL_MATRIX) {
3720     *newmat = M;
3721     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3722     PetscCall(MatDestroy(&Msub));
3723 
3724     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3725     PetscCall(ISDestroy(&iscol_sub));
3726 
3727     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3728     PetscCall(ISDestroy(&iscmap));
3729 
3730     if (iscol_local) {
3731       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3732       PetscCall(ISDestroy(&iscol_local));
3733     }
3734   }
3735   PetscFunctionReturn(PETSC_SUCCESS);
3736 }
3737 
3738 /*
3739     Not great since it makes two copies of the submatrix, first an SeqAIJ
3740   in local and then by concatenating the local matrices the end result.
3741   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3742 
3743   This requires a sequential iscol with all indices.
3744 */
3745 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3746 {
3747   PetscMPIInt rank, size;
3748   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3749   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3750   Mat         M, Mreuse;
3751   MatScalar  *aa, *vwork;
3752   MPI_Comm    comm;
3753   Mat_SeqAIJ *aij;
3754   PetscBool   colflag, allcolumns = PETSC_FALSE;
3755 
3756   PetscFunctionBegin;
3757   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3758   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3759   PetscCallMPI(MPI_Comm_size(comm, &size));
3760 
3761   /* Check for special case: each processor gets entire matrix columns */
3762   PetscCall(ISIdentity(iscol, &colflag));
3763   PetscCall(ISGetLocalSize(iscol, &n));
3764   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3765   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3766 
3767   if (call == MAT_REUSE_MATRIX) {
3768     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3769     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3770     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3771   } else {
3772     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3773   }
3774 
3775   /*
3776       m - number of local rows
3777       n - number of columns (same on all processors)
3778       rstart - first row in new global matrix generated
3779   */
3780   PetscCall(MatGetSize(Mreuse, &m, &n));
3781   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3782   if (call == MAT_INITIAL_MATRIX) {
3783     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3784     ii  = aij->i;
3785     jj  = aij->j;
3786 
3787     /*
3788         Determine the number of non-zeros in the diagonal and off-diagonal
3789         portions of the matrix in order to do correct preallocation
3790     */
3791 
3792     /* first get start and end of "diagonal" columns */
3793     if (csize == PETSC_DECIDE) {
3794       PetscCall(ISGetSize(isrow, &mglobal));
3795       if (mglobal == n) { /* square matrix */
3796         nlocal = m;
3797       } else {
3798         nlocal = n / size + ((n % size) > rank);
3799       }
3800     } else {
3801       nlocal = csize;
3802     }
3803     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3804     rstart = rend - nlocal;
3805     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3806 
3807     /* next, compute all the lengths */
3808     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3809     olens = dlens + m;
3810     for (i = 0; i < m; i++) {
3811       jend = ii[i + 1] - ii[i];
3812       olen = 0;
3813       dlen = 0;
3814       for (j = 0; j < jend; j++) {
3815         if (*jj < rstart || *jj >= rend) olen++;
3816         else dlen++;
3817         jj++;
3818       }
3819       olens[i] = olen;
3820       dlens[i] = dlen;
3821     }
3822     PetscCall(MatCreate(comm, &M));
3823     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3824     PetscCall(MatSetBlockSizes(M, bs, cbs));
3825     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3826     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3827     PetscCall(PetscFree(dlens));
3828   } else {
3829     PetscInt ml, nl;
3830 
3831     M = *newmat;
3832     PetscCall(MatGetLocalSize(M, &ml, &nl));
3833     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3834     PetscCall(MatZeroEntries(M));
3835     /*
3836          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3837        rather than the slower MatSetValues().
3838     */
3839     M->was_assembled = PETSC_TRUE;
3840     M->assembled     = PETSC_FALSE;
3841   }
3842   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3843   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3844   ii  = aij->i;
3845   jj  = aij->j;
3846 
3847   /* trigger copy to CPU if needed */
3848   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3849   for (i = 0; i < m; i++) {
3850     row   = rstart + i;
3851     nz    = ii[i + 1] - ii[i];
3852     cwork = jj;
3853     jj += nz;
3854     vwork = aa;
3855     aa += nz;
3856     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3857   }
3858   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3859 
3860   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3861   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3862   *newmat = M;
3863 
3864   /* save submatrix used in processor for next request */
3865   if (call == MAT_INITIAL_MATRIX) {
3866     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3867     PetscCall(MatDestroy(&Mreuse));
3868   }
3869   PetscFunctionReturn(PETSC_SUCCESS);
3870 }
3871 
3872 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3873 {
3874   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3875   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3876   const PetscInt *JJ;
3877   PetscBool       nooffprocentries;
3878   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3879 
3880   PetscFunctionBegin;
3881   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3882 
3883   PetscCall(PetscLayoutSetUp(B->rmap));
3884   PetscCall(PetscLayoutSetUp(B->cmap));
3885   m      = B->rmap->n;
3886   cstart = B->cmap->rstart;
3887   cend   = B->cmap->rend;
3888   rstart = B->rmap->rstart;
3889 
3890   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3891 
3892   if (PetscDefined(USE_DEBUG)) {
3893     for (i = 0; i < m; i++) {
3894       nnz = Ii[i + 1] - Ii[i];
3895       JJ  = J + Ii[i];
3896       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3897       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3898       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3899     }
3900   }
3901 
3902   for (i = 0; i < m; i++) {
3903     nnz     = Ii[i + 1] - Ii[i];
3904     JJ      = J + Ii[i];
3905     nnz_max = PetscMax(nnz_max, nnz);
3906     d       = 0;
3907     for (j = 0; j < nnz; j++) {
3908       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3909     }
3910     d_nnz[i] = d;
3911     o_nnz[i] = nnz - d;
3912   }
3913   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3914   PetscCall(PetscFree2(d_nnz, o_nnz));
3915 
3916   for (i = 0; i < m; i++) {
3917     ii = i + rstart;
3918     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3919   }
3920   nooffprocentries    = B->nooffprocentries;
3921   B->nooffprocentries = PETSC_TRUE;
3922   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3923   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3924   B->nooffprocentries = nooffprocentries;
3925 
3926   /* count number of entries below block diagonal */
3927   PetscCall(PetscFree(Aij->ld));
3928   PetscCall(PetscCalloc1(m, &ld));
3929   Aij->ld = ld;
3930   for (i = 0; i < m; i++) {
3931     nnz = Ii[i + 1] - Ii[i];
3932     j   = 0;
3933     while (j < nnz && J[j] < cstart) j++;
3934     ld[i] = j;
3935     J += nnz;
3936   }
3937 
3938   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3939   PetscFunctionReturn(PETSC_SUCCESS);
3940 }
3941 
3942 /*@
3943   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3944   (the default parallel PETSc format).
3945 
3946   Collective
3947 
3948   Input Parameters:
3949 + B - the matrix
3950 . i - the indices into j for the start of each local row (starts with zero)
3951 . j - the column indices for each local row (starts with zero)
3952 - v - optional values in the matrix
3953 
3954   Level: developer
3955 
3956   Notes:
3957   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3958   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3959   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3960 
3961   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3962 
3963   The format which is used for the sparse matrix input, is equivalent to a
3964   row-major ordering.. i.e for the following matrix, the input data expected is
3965   as shown
3966 
3967 .vb
3968         1 0 0
3969         2 0 3     P0
3970        -------
3971         4 5 6     P1
3972 
3973      Process0 [P0] rows_owned=[0,1]
3974         i =  {0,1,3}  [size = nrow+1  = 2+1]
3975         j =  {0,0,2}  [size = 3]
3976         v =  {1,2,3}  [size = 3]
3977 
3978      Process1 [P1] rows_owned=[2]
3979         i =  {0,3}    [size = nrow+1  = 1+1]
3980         j =  {0,1,2}  [size = 3]
3981         v =  {4,5,6}  [size = 3]
3982 .ve
3983 
3984 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
3985           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3986 @*/
3987 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3988 {
3989   PetscFunctionBegin;
3990   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3991   PetscFunctionReturn(PETSC_SUCCESS);
3992 }
3993 
3994 /*@C
3995   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3996   (the default parallel PETSc format).  For good matrix assembly performance
3997   the user should preallocate the matrix storage by setting the parameters
3998   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
3999 
4000   Collective
4001 
4002   Input Parameters:
4003 + B     - the matrix
4004 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4005            (same value is used for all local rows)
4006 . d_nnz - array containing the number of nonzeros in the various rows of the
4007            DIAGONAL portion of the local submatrix (possibly different for each row)
4008            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4009            The size of this array is equal to the number of local rows, i.e 'm'.
4010            For matrices that will be factored, you must leave room for (and set)
4011            the diagonal entry even if it is zero.
4012 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4013            submatrix (same value is used for all local rows).
4014 - o_nnz - array containing the number of nonzeros in the various rows of the
4015            OFF-DIAGONAL portion of the local submatrix (possibly different for
4016            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4017            structure. The size of this array is equal to the number
4018            of local rows, i.e 'm'.
4019 
4020   Example Usage:
4021   Consider the following 8x8 matrix with 34 non-zero values, that is
4022   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4023   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4024   as follows
4025 
4026 .vb
4027             1  2  0  |  0  3  0  |  0  4
4028     Proc0   0  5  6  |  7  0  0  |  8  0
4029             9  0 10  | 11  0  0  | 12  0
4030     -------------------------------------
4031            13  0 14  | 15 16 17  |  0  0
4032     Proc1   0 18  0  | 19 20 21  |  0  0
4033             0  0  0  | 22 23  0  | 24  0
4034     -------------------------------------
4035     Proc2  25 26 27  |  0  0 28  | 29  0
4036            30  0  0  | 31 32 33  |  0 34
4037 .ve
4038 
4039   This can be represented as a collection of submatrices as
4040 .vb
4041       A B C
4042       D E F
4043       G H I
4044 .ve
4045 
4046   Where the submatrices A,B,C are owned by proc0, D,E,F are
4047   owned by proc1, G,H,I are owned by proc2.
4048 
4049   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4050   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4051   The 'M','N' parameters are 8,8, and have the same values on all procs.
4052 
4053   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4054   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4055   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4056   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4057   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4058   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4059 
4060   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4061   allocated for every row of the local diagonal submatrix, and `o_nz`
4062   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4063   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4064   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4065   In this case, the values of `d_nz`, `o_nz` are
4066 .vb
4067      proc0  dnz = 2, o_nz = 2
4068      proc1  dnz = 3, o_nz = 2
4069      proc2  dnz = 1, o_nz = 4
4070 .ve
4071   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4072   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4073   for proc3. i.e we are using 12+15+10=37 storage locations to store
4074   34 values.
4075 
4076   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4077   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4078   In the above case the values for `d_nnz`, `o_nnz` are
4079 .vb
4080      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4081      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4082      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4083 .ve
4084   Here the space allocated is sum of all the above values i.e 34, and
4085   hence pre-allocation is perfect.
4086 
4087   Level: intermediate
4088 
4089   Notes:
4090   If the *_nnz parameter is given then the *_nz parameter is ignored
4091 
4092   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4093   storage.  The stored row and column indices begin with zero.
4094   See [Sparse Matrices](sec_matsparse) for details.
4095 
4096   The parallel matrix is partitioned such that the first m0 rows belong to
4097   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4098   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4099 
4100   The DIAGONAL portion of the local submatrix of a processor can be defined
4101   as the submatrix which is obtained by extraction the part corresponding to
4102   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4103   first row that belongs to the processor, r2 is the last row belonging to
4104   the this processor, and c1-c2 is range of indices of the local part of a
4105   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4106   common case of a square matrix, the row and column ranges are the same and
4107   the DIAGONAL part is also square. The remaining portion of the local
4108   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4109 
4110   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4111 
4112   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4113   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4114   You can also run with the option `-info` and look for messages with the string
4115   malloc in them to see if additional memory allocation was needed.
4116 
4117 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4118           `MatGetInfo()`, `PetscSplitOwnership()`
4119 @*/
4120 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4121 {
4122   PetscFunctionBegin;
4123   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4124   PetscValidType(B, 1);
4125   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4126   PetscFunctionReturn(PETSC_SUCCESS);
4127 }
4128 
4129 /*@
4130   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4131   CSR format for the local rows.
4132 
4133   Collective
4134 
4135   Input Parameters:
4136 + comm - MPI communicator
4137 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4138 . n    - This value should be the same as the local size used in creating the
4139        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4140        calculated if N is given) For square matrices n is almost always m.
4141 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4142 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4143 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4144 . j    - column indices
4145 - a    - optional matrix values
4146 
4147   Output Parameter:
4148 . mat - the matrix
4149 
4150   Level: intermediate
4151 
4152   Notes:
4153   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4154   thus you CANNOT change the matrix entries by changing the values of a[] after you have
4155   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4156 
4157   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4158 
4159   The format which is used for the sparse matrix input, is equivalent to a
4160   row-major ordering.. i.e for the following matrix, the input data expected is
4161   as shown
4162 
4163   Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4164 .vb
4165         1 0 0
4166         2 0 3     P0
4167        -------
4168         4 5 6     P1
4169 
4170      Process0 [P0] rows_owned=[0,1]
4171         i =  {0,1,3}  [size = nrow+1  = 2+1]
4172         j =  {0,0,2}  [size = 3]
4173         v =  {1,2,3}  [size = 3]
4174 
4175      Process1 [P1] rows_owned=[2]
4176         i =  {0,3}    [size = nrow+1  = 1+1]
4177         j =  {0,1,2}  [size = 3]
4178         v =  {4,5,6}  [size = 3]
4179 .ve
4180 
4181 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4182           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4183 @*/
4184 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4185 {
4186   PetscFunctionBegin;
4187   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4188   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4189   PetscCall(MatCreate(comm, mat));
4190   PetscCall(MatSetSizes(*mat, m, n, M, N));
4191   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4192   PetscCall(MatSetType(*mat, MATMPIAIJ));
4193   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4194   PetscFunctionReturn(PETSC_SUCCESS);
4195 }
4196 
4197 /*@
4198   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4199   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4200   from `MatCreateMPIAIJWithArrays()`
4201 
4202   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4203 
4204   Collective
4205 
4206   Input Parameters:
4207 + mat - the matrix
4208 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4209 . n   - This value should be the same as the local size used in creating the
4210        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4211        calculated if N is given) For square matrices n is almost always m.
4212 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4213 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4214 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4215 . J   - column indices
4216 - v   - matrix values
4217 
4218   Level: deprecated
4219 
4220 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4221           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`
4222 @*/
4223 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4224 {
4225   PetscInt        nnz, i;
4226   PetscBool       nooffprocentries;
4227   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4228   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4229   PetscScalar    *ad, *ao;
4230   PetscInt        ldi, Iii, md;
4231   const PetscInt *Adi = Ad->i;
4232   PetscInt       *ld  = Aij->ld;
4233 
4234   PetscFunctionBegin;
4235   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4236   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4237   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4238   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4239 
4240   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4241   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4242 
4243   for (i = 0; i < m; i++) {
4244     nnz = Ii[i + 1] - Ii[i];
4245     Iii = Ii[i];
4246     ldi = ld[i];
4247     md  = Adi[i + 1] - Adi[i];
4248     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4249     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4250     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4251     ad += md;
4252     ao += nnz - md;
4253   }
4254   nooffprocentries      = mat->nooffprocentries;
4255   mat->nooffprocentries = PETSC_TRUE;
4256   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4257   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4258   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4259   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4260   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4261   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4262   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4263   mat->nooffprocentries = nooffprocentries;
4264   PetscFunctionReturn(PETSC_SUCCESS);
4265 }
4266 
4267 /*@
4268   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4269 
4270   Collective
4271 
4272   Input Parameters:
4273 + mat - the matrix
4274 - v   - matrix values, stored by row
4275 
4276   Level: intermediate
4277 
4278   Note:
4279   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4280 
4281 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4282           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4283 @*/
4284 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4285 {
4286   PetscInt        nnz, i, m;
4287   PetscBool       nooffprocentries;
4288   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4289   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4290   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4291   PetscScalar    *ad, *ao;
4292   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4293   PetscInt        ldi, Iii, md;
4294   PetscInt       *ld = Aij->ld;
4295 
4296   PetscFunctionBegin;
4297   m = mat->rmap->n;
4298 
4299   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4300   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4301   Iii = 0;
4302   for (i = 0; i < m; i++) {
4303     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4304     ldi = ld[i];
4305     md  = Adi[i + 1] - Adi[i];
4306     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4307     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4308     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4309     ad += md;
4310     ao += nnz - md;
4311     Iii += nnz;
4312   }
4313   nooffprocentries      = mat->nooffprocentries;
4314   mat->nooffprocentries = PETSC_TRUE;
4315   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4316   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4317   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4318   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4319   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4320   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4321   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4322   mat->nooffprocentries = nooffprocentries;
4323   PetscFunctionReturn(PETSC_SUCCESS);
4324 }
4325 
4326 /*@C
4327   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4328   (the default parallel PETSc format).  For good matrix assembly performance
4329   the user should preallocate the matrix storage by setting the parameters
4330   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4331 
4332   Collective
4333 
4334   Input Parameters:
4335 + comm  - MPI communicator
4336 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4337            This value should be the same as the local size used in creating the
4338            y vector for the matrix-vector product y = Ax.
4339 . n     - This value should be the same as the local size used in creating the
4340        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4341        calculated if N is given) For square matrices n is almost always m.
4342 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4343 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4344 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4345            (same value is used for all local rows)
4346 . d_nnz - array containing the number of nonzeros in the various rows of the
4347            DIAGONAL portion of the local submatrix (possibly different for each row)
4348            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4349            The size of this array is equal to the number of local rows, i.e 'm'.
4350 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4351            submatrix (same value is used for all local rows).
4352 - o_nnz - array containing the number of nonzeros in the various rows of the
4353            OFF-DIAGONAL portion of the local submatrix (possibly different for
4354            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4355            structure. The size of this array is equal to the number
4356            of local rows, i.e 'm'.
4357 
4358   Output Parameter:
4359 . A - the matrix
4360 
4361   Options Database Keys:
4362 + -mat_no_inode                     - Do not use inodes
4363 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4364 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4365         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4366         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4367 
4368   Level: intermediate
4369 
4370   Notes:
4371   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4372   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4373   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4374 
4375   If the *_nnz parameter is given then the *_nz parameter is ignored
4376 
4377   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4378   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4379   storage requirements for this matrix.
4380 
4381   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4382   processor than it must be used on all processors that share the object for
4383   that argument.
4384 
4385   The user MUST specify either the local or global matrix dimensions
4386   (possibly both).
4387 
4388   The parallel matrix is partitioned across processors such that the
4389   first m0 rows belong to process 0, the next m1 rows belong to
4390   process 1, the next m2 rows belong to process 2 etc.. where
4391   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4392   values corresponding to [m x N] submatrix.
4393 
4394   The columns are logically partitioned with the n0 columns belonging
4395   to 0th partition, the next n1 columns belonging to the next
4396   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4397 
4398   The DIAGONAL portion of the local submatrix on any given processor
4399   is the submatrix corresponding to the rows and columns m,n
4400   corresponding to the given processor. i.e diagonal matrix on
4401   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4402   etc. The remaining portion of the local submatrix [m x (N-n)]
4403   constitute the OFF-DIAGONAL portion. The example below better
4404   illustrates this concept.
4405 
4406   For a square global matrix we define each processor's diagonal portion
4407   to be its local rows and the corresponding columns (a square submatrix);
4408   each processor's off-diagonal portion encompasses the remainder of the
4409   local matrix (a rectangular submatrix).
4410 
4411   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4412 
4413   When calling this routine with a single process communicator, a matrix of
4414   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4415   type of communicator, use the construction mechanism
4416 .vb
4417   MatCreate(..., &A);
4418   MatSetType(A, MATMPIAIJ);
4419   MatSetSizes(A, m, n, M, N);
4420   MatMPIAIJSetPreallocation(A, ...);
4421 .ve
4422 
4423   By default, this format uses inodes (identical nodes) when possible.
4424   We search for consecutive rows with the same nonzero structure, thereby
4425   reusing matrix information to achieve increased efficiency.
4426 
4427   Example Usage:
4428   Consider the following 8x8 matrix with 34 non-zero values, that is
4429   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4430   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4431   as follows
4432 
4433 .vb
4434             1  2  0  |  0  3  0  |  0  4
4435     Proc0   0  5  6  |  7  0  0  |  8  0
4436             9  0 10  | 11  0  0  | 12  0
4437     -------------------------------------
4438            13  0 14  | 15 16 17  |  0  0
4439     Proc1   0 18  0  | 19 20 21  |  0  0
4440             0  0  0  | 22 23  0  | 24  0
4441     -------------------------------------
4442     Proc2  25 26 27  |  0  0 28  | 29  0
4443            30  0  0  | 31 32 33  |  0 34
4444 .ve
4445 
4446   This can be represented as a collection of submatrices as
4447 
4448 .vb
4449       A B C
4450       D E F
4451       G H I
4452 .ve
4453 
4454   Where the submatrices A,B,C are owned by proc0, D,E,F are
4455   owned by proc1, G,H,I are owned by proc2.
4456 
4457   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4458   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4459   The 'M','N' parameters are 8,8, and have the same values on all procs.
4460 
4461   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4462   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4463   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4464   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4465   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4466   matrix, ans [DF] as another SeqAIJ matrix.
4467 
4468   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4469   allocated for every row of the local diagonal submatrix, and `o_nz`
4470   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4471   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4472   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4473   In this case, the values of `d_nz`,`o_nz` are
4474 .vb
4475      proc0  dnz = 2, o_nz = 2
4476      proc1  dnz = 3, o_nz = 2
4477      proc2  dnz = 1, o_nz = 4
4478 .ve
4479   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4480   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4481   for proc3. i.e we are using 12+15+10=37 storage locations to store
4482   34 values.
4483 
4484   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4485   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4486   In the above case the values for d_nnz,o_nnz are
4487 .vb
4488      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4489      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4490      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4491 .ve
4492   Here the space allocated is sum of all the above values i.e 34, and
4493   hence pre-allocation is perfect.
4494 
4495 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4496           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4497 @*/
4498 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4499 {
4500   PetscMPIInt size;
4501 
4502   PetscFunctionBegin;
4503   PetscCall(MatCreate(comm, A));
4504   PetscCall(MatSetSizes(*A, m, n, M, N));
4505   PetscCallMPI(MPI_Comm_size(comm, &size));
4506   if (size > 1) {
4507     PetscCall(MatSetType(*A, MATMPIAIJ));
4508     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4509   } else {
4510     PetscCall(MatSetType(*A, MATSEQAIJ));
4511     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4512   }
4513   PetscFunctionReturn(PETSC_SUCCESS);
4514 }
4515 
4516 /*MC
4517     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4518 
4519     Synopsis:
4520     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4521 
4522     Not Collective
4523 
4524     Input Parameter:
4525 .   A - the `MATMPIAIJ` matrix
4526 
4527     Output Parameters:
4528 +   Ad - the diagonal portion of the matrix
4529 .   Ao - the off diagonal portion of the matrix
4530 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4531 -   ierr - error code
4532 
4533      Level: advanced
4534 
4535     Note:
4536     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4537 
4538 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4539 M*/
4540 
4541 /*MC
4542     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4543 
4544     Synopsis:
4545     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4546 
4547     Not Collective
4548 
4549     Input Parameters:
4550 +   A - the `MATMPIAIJ` matrix
4551 .   Ad - the diagonal portion of the matrix
4552 .   Ao - the off diagonal portion of the matrix
4553 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4554 -   ierr - error code
4555 
4556      Level: advanced
4557 
4558 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4559 M*/
4560 
4561 /*@C
4562   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4563 
4564   Not Collective
4565 
4566   Input Parameter:
4567 . A - The `MATMPIAIJ` matrix
4568 
4569   Output Parameters:
4570 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4571 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4572 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4573 
4574   Level: intermediate
4575 
4576   Note:
4577   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4578   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4579   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4580   local column numbers to global column numbers in the original matrix.
4581 
4582   Fortran Notes:
4583   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4584 
4585 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4586 @*/
4587 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4588 {
4589   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4590   PetscBool   flg;
4591 
4592   PetscFunctionBegin;
4593   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4594   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4595   if (Ad) *Ad = a->A;
4596   if (Ao) *Ao = a->B;
4597   if (colmap) *colmap = a->garray;
4598   PetscFunctionReturn(PETSC_SUCCESS);
4599 }
4600 
4601 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4602 {
4603   PetscInt     m, N, i, rstart, nnz, Ii;
4604   PetscInt    *indx;
4605   PetscScalar *values;
4606   MatType      rootType;
4607 
4608   PetscFunctionBegin;
4609   PetscCall(MatGetSize(inmat, &m, &N));
4610   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4611     PetscInt *dnz, *onz, sum, bs, cbs;
4612 
4613     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4614     /* Check sum(n) = N */
4615     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4616     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4617 
4618     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4619     rstart -= m;
4620 
4621     MatPreallocateBegin(comm, m, n, dnz, onz);
4622     for (i = 0; i < m; i++) {
4623       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4624       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4625       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4626     }
4627 
4628     PetscCall(MatCreate(comm, outmat));
4629     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4630     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4631     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4632     PetscCall(MatGetRootType_Private(inmat, &rootType));
4633     PetscCall(MatSetType(*outmat, rootType));
4634     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4635     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4636     MatPreallocateEnd(dnz, onz);
4637     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4638   }
4639 
4640   /* numeric phase */
4641   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4642   for (i = 0; i < m; i++) {
4643     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4644     Ii = i + rstart;
4645     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4646     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4647   }
4648   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4649   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4650   PetscFunctionReturn(PETSC_SUCCESS);
4651 }
4652 
4653 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4654 {
4655   PetscMPIInt        rank;
4656   PetscInt           m, N, i, rstart, nnz;
4657   size_t             len;
4658   const PetscInt    *indx;
4659   PetscViewer        out;
4660   char              *name;
4661   Mat                B;
4662   const PetscScalar *values;
4663 
4664   PetscFunctionBegin;
4665   PetscCall(MatGetLocalSize(A, &m, NULL));
4666   PetscCall(MatGetSize(A, NULL, &N));
4667   /* Should this be the type of the diagonal block of A? */
4668   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4669   PetscCall(MatSetSizes(B, m, N, m, N));
4670   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4671   PetscCall(MatSetType(B, MATSEQAIJ));
4672   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4673   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4674   for (i = 0; i < m; i++) {
4675     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4676     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4677     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4678   }
4679   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4680   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4681 
4682   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4683   PetscCall(PetscStrlen(outfile, &len));
4684   PetscCall(PetscMalloc1(len + 6, &name));
4685   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4686   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4687   PetscCall(PetscFree(name));
4688   PetscCall(MatView(B, out));
4689   PetscCall(PetscViewerDestroy(&out));
4690   PetscCall(MatDestroy(&B));
4691   PetscFunctionReturn(PETSC_SUCCESS);
4692 }
4693 
4694 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4695 {
4696   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4697 
4698   PetscFunctionBegin;
4699   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4700   PetscCall(PetscFree(merge->id_r));
4701   PetscCall(PetscFree(merge->len_s));
4702   PetscCall(PetscFree(merge->len_r));
4703   PetscCall(PetscFree(merge->bi));
4704   PetscCall(PetscFree(merge->bj));
4705   PetscCall(PetscFree(merge->buf_ri[0]));
4706   PetscCall(PetscFree(merge->buf_ri));
4707   PetscCall(PetscFree(merge->buf_rj[0]));
4708   PetscCall(PetscFree(merge->buf_rj));
4709   PetscCall(PetscFree(merge->coi));
4710   PetscCall(PetscFree(merge->coj));
4711   PetscCall(PetscFree(merge->owners_co));
4712   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4713   PetscCall(PetscFree(merge));
4714   PetscFunctionReturn(PETSC_SUCCESS);
4715 }
4716 
4717 #include <../src/mat/utils/freespace.h>
4718 #include <petscbt.h>
4719 
4720 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4721 {
4722   MPI_Comm             comm;
4723   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4724   PetscMPIInt          size, rank, taga, *len_s;
4725   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4726   PetscInt             proc, m;
4727   PetscInt           **buf_ri, **buf_rj;
4728   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4729   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4730   MPI_Request         *s_waits, *r_waits;
4731   MPI_Status          *status;
4732   const MatScalar     *aa, *a_a;
4733   MatScalar          **abuf_r, *ba_i;
4734   Mat_Merge_SeqsToMPI *merge;
4735   PetscContainer       container;
4736 
4737   PetscFunctionBegin;
4738   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4739   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4740 
4741   PetscCallMPI(MPI_Comm_size(comm, &size));
4742   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4743 
4744   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4745   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4746   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4747   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4748   aa = a_a;
4749 
4750   bi     = merge->bi;
4751   bj     = merge->bj;
4752   buf_ri = merge->buf_ri;
4753   buf_rj = merge->buf_rj;
4754 
4755   PetscCall(PetscMalloc1(size, &status));
4756   owners = merge->rowmap->range;
4757   len_s  = merge->len_s;
4758 
4759   /* send and recv matrix values */
4760   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4761   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4762 
4763   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4764   for (proc = 0, k = 0; proc < size; proc++) {
4765     if (!len_s[proc]) continue;
4766     i = owners[proc];
4767     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4768     k++;
4769   }
4770 
4771   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4772   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4773   PetscCall(PetscFree(status));
4774 
4775   PetscCall(PetscFree(s_waits));
4776   PetscCall(PetscFree(r_waits));
4777 
4778   /* insert mat values of mpimat */
4779   PetscCall(PetscMalloc1(N, &ba_i));
4780   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4781 
4782   for (k = 0; k < merge->nrecv; k++) {
4783     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4784     nrows       = *(buf_ri_k[k]);
4785     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4786     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4787   }
4788 
4789   /* set values of ba */
4790   m = merge->rowmap->n;
4791   for (i = 0; i < m; i++) {
4792     arow = owners[rank] + i;
4793     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4794     bnzi = bi[i + 1] - bi[i];
4795     PetscCall(PetscArrayzero(ba_i, bnzi));
4796 
4797     /* add local non-zero vals of this proc's seqmat into ba */
4798     anzi   = ai[arow + 1] - ai[arow];
4799     aj     = a->j + ai[arow];
4800     aa     = a_a + ai[arow];
4801     nextaj = 0;
4802     for (j = 0; nextaj < anzi; j++) {
4803       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4804         ba_i[j] += aa[nextaj++];
4805       }
4806     }
4807 
4808     /* add received vals into ba */
4809     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4810       /* i-th row */
4811       if (i == *nextrow[k]) {
4812         anzi   = *(nextai[k] + 1) - *nextai[k];
4813         aj     = buf_rj[k] + *(nextai[k]);
4814         aa     = abuf_r[k] + *(nextai[k]);
4815         nextaj = 0;
4816         for (j = 0; nextaj < anzi; j++) {
4817           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4818             ba_i[j] += aa[nextaj++];
4819           }
4820         }
4821         nextrow[k]++;
4822         nextai[k]++;
4823       }
4824     }
4825     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4826   }
4827   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4828   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4829   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4830 
4831   PetscCall(PetscFree(abuf_r[0]));
4832   PetscCall(PetscFree(abuf_r));
4833   PetscCall(PetscFree(ba_i));
4834   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4835   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4836   PetscFunctionReturn(PETSC_SUCCESS);
4837 }
4838 
4839 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4840 {
4841   Mat                  B_mpi;
4842   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4843   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4844   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4845   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4846   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4847   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4848   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4849   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4850   MPI_Status          *status;
4851   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4852   PetscBT              lnkbt;
4853   Mat_Merge_SeqsToMPI *merge;
4854   PetscContainer       container;
4855 
4856   PetscFunctionBegin;
4857   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4858 
4859   /* make sure it is a PETSc comm */
4860   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4861   PetscCallMPI(MPI_Comm_size(comm, &size));
4862   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4863 
4864   PetscCall(PetscNew(&merge));
4865   PetscCall(PetscMalloc1(size, &status));
4866 
4867   /* determine row ownership */
4868   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4869   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4870   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4871   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4872   PetscCall(PetscLayoutSetUp(merge->rowmap));
4873   PetscCall(PetscMalloc1(size, &len_si));
4874   PetscCall(PetscMalloc1(size, &merge->len_s));
4875 
4876   m      = merge->rowmap->n;
4877   owners = merge->rowmap->range;
4878 
4879   /* determine the number of messages to send, their lengths */
4880   len_s = merge->len_s;
4881 
4882   len          = 0; /* length of buf_si[] */
4883   merge->nsend = 0;
4884   for (proc = 0; proc < size; proc++) {
4885     len_si[proc] = 0;
4886     if (proc == rank) {
4887       len_s[proc] = 0;
4888     } else {
4889       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4890       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4891     }
4892     if (len_s[proc]) {
4893       merge->nsend++;
4894       nrows = 0;
4895       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4896         if (ai[i + 1] > ai[i]) nrows++;
4897       }
4898       len_si[proc] = 2 * (nrows + 1);
4899       len += len_si[proc];
4900     }
4901   }
4902 
4903   /* determine the number and length of messages to receive for ij-structure */
4904   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4905   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4906 
4907   /* post the Irecv of j-structure */
4908   PetscCall(PetscCommGetNewTag(comm, &tagj));
4909   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4910 
4911   /* post the Isend of j-structure */
4912   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4913 
4914   for (proc = 0, k = 0; proc < size; proc++) {
4915     if (!len_s[proc]) continue;
4916     i = owners[proc];
4917     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4918     k++;
4919   }
4920 
4921   /* receives and sends of j-structure are complete */
4922   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4923   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4924 
4925   /* send and recv i-structure */
4926   PetscCall(PetscCommGetNewTag(comm, &tagi));
4927   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4928 
4929   PetscCall(PetscMalloc1(len + 1, &buf_s));
4930   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4931   for (proc = 0, k = 0; proc < size; proc++) {
4932     if (!len_s[proc]) continue;
4933     /* form outgoing message for i-structure:
4934          buf_si[0]:                 nrows to be sent
4935                [1:nrows]:           row index (global)
4936                [nrows+1:2*nrows+1]: i-structure index
4937     */
4938     nrows       = len_si[proc] / 2 - 1;
4939     buf_si_i    = buf_si + nrows + 1;
4940     buf_si[0]   = nrows;
4941     buf_si_i[0] = 0;
4942     nrows       = 0;
4943     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4944       anzi = ai[i + 1] - ai[i];
4945       if (anzi) {
4946         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4947         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4948         nrows++;
4949       }
4950     }
4951     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4952     k++;
4953     buf_si += len_si[proc];
4954   }
4955 
4956   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4957   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4958 
4959   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4960   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4961 
4962   PetscCall(PetscFree(len_si));
4963   PetscCall(PetscFree(len_ri));
4964   PetscCall(PetscFree(rj_waits));
4965   PetscCall(PetscFree2(si_waits, sj_waits));
4966   PetscCall(PetscFree(ri_waits));
4967   PetscCall(PetscFree(buf_s));
4968   PetscCall(PetscFree(status));
4969 
4970   /* compute a local seq matrix in each processor */
4971   /* allocate bi array and free space for accumulating nonzero column info */
4972   PetscCall(PetscMalloc1(m + 1, &bi));
4973   bi[0] = 0;
4974 
4975   /* create and initialize a linked list */
4976   nlnk = N + 1;
4977   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4978 
4979   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4980   len = ai[owners[rank + 1]] - ai[owners[rank]];
4981   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4982 
4983   current_space = free_space;
4984 
4985   /* determine symbolic info for each local row */
4986   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4987 
4988   for (k = 0; k < merge->nrecv; k++) {
4989     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4990     nrows       = *buf_ri_k[k];
4991     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4992     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4993   }
4994 
4995   MatPreallocateBegin(comm, m, n, dnz, onz);
4996   len = 0;
4997   for (i = 0; i < m; i++) {
4998     bnzi = 0;
4999     /* add local non-zero cols of this proc's seqmat into lnk */
5000     arow = owners[rank] + i;
5001     anzi = ai[arow + 1] - ai[arow];
5002     aj   = a->j + ai[arow];
5003     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5004     bnzi += nlnk;
5005     /* add received col data into lnk */
5006     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5007       if (i == *nextrow[k]) {            /* i-th row */
5008         anzi = *(nextai[k] + 1) - *nextai[k];
5009         aj   = buf_rj[k] + *nextai[k];
5010         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5011         bnzi += nlnk;
5012         nextrow[k]++;
5013         nextai[k]++;
5014       }
5015     }
5016     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5017 
5018     /* if free space is not available, make more free space */
5019     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5020     /* copy data into free space, then initialize lnk */
5021     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5022     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5023 
5024     current_space->array += bnzi;
5025     current_space->local_used += bnzi;
5026     current_space->local_remaining -= bnzi;
5027 
5028     bi[i + 1] = bi[i] + bnzi;
5029   }
5030 
5031   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5032 
5033   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5034   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5035   PetscCall(PetscLLDestroy(lnk, lnkbt));
5036 
5037   /* create symbolic parallel matrix B_mpi */
5038   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5039   PetscCall(MatCreate(comm, &B_mpi));
5040   if (n == PETSC_DECIDE) {
5041     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5042   } else {
5043     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5044   }
5045   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5046   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5047   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5048   MatPreallocateEnd(dnz, onz);
5049   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5050 
5051   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5052   B_mpi->assembled = PETSC_FALSE;
5053   merge->bi        = bi;
5054   merge->bj        = bj;
5055   merge->buf_ri    = buf_ri;
5056   merge->buf_rj    = buf_rj;
5057   merge->coi       = NULL;
5058   merge->coj       = NULL;
5059   merge->owners_co = NULL;
5060 
5061   PetscCall(PetscCommDestroy(&comm));
5062 
5063   /* attach the supporting struct to B_mpi for reuse */
5064   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5065   PetscCall(PetscContainerSetPointer(container, merge));
5066   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5067   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5068   PetscCall(PetscContainerDestroy(&container));
5069   *mpimat = B_mpi;
5070 
5071   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5072   PetscFunctionReturn(PETSC_SUCCESS);
5073 }
5074 
5075 /*@C
5076   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5077   matrices from each processor
5078 
5079   Collective
5080 
5081   Input Parameters:
5082 + comm   - the communicators the parallel matrix will live on
5083 . seqmat - the input sequential matrices
5084 . m      - number of local rows (or `PETSC_DECIDE`)
5085 . n      - number of local columns (or `PETSC_DECIDE`)
5086 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5087 
5088   Output Parameter:
5089 . mpimat - the parallel matrix generated
5090 
5091   Level: advanced
5092 
5093   Note:
5094   The dimensions of the sequential matrix in each processor MUST be the same.
5095   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5096   destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5097 
5098 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5099 @*/
5100 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5101 {
5102   PetscMPIInt size;
5103 
5104   PetscFunctionBegin;
5105   PetscCallMPI(MPI_Comm_size(comm, &size));
5106   if (size == 1) {
5107     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5108     if (scall == MAT_INITIAL_MATRIX) {
5109       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5110     } else {
5111       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5112     }
5113     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5114     PetscFunctionReturn(PETSC_SUCCESS);
5115   }
5116   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5117   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5118   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5119   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5120   PetscFunctionReturn(PETSC_SUCCESS);
5121 }
5122 
5123 /*@
5124   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5125 
5126   Not Collective
5127 
5128   Input Parameter:
5129 . A - the matrix
5130 
5131   Output Parameter:
5132 . A_loc - the local sequential matrix generated
5133 
5134   Level: developer
5135 
5136   Notes:
5137   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5138   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5139   `n` is the global column count obtained with `MatGetSize()`
5140 
5141   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5142 
5143   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5144 
5145   Destroy the matrix with `MatDestroy()`
5146 
5147 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5148 @*/
5149 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5150 {
5151   PetscBool mpi;
5152 
5153   PetscFunctionBegin;
5154   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5155   if (mpi) {
5156     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5157   } else {
5158     *A_loc = A;
5159     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5160   }
5161   PetscFunctionReturn(PETSC_SUCCESS);
5162 }
5163 
5164 /*@
5165   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5166 
5167   Not Collective
5168 
5169   Input Parameters:
5170 + A     - the matrix
5171 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5172 
5173   Output Parameter:
5174 . A_loc - the local sequential matrix generated
5175 
5176   Level: developer
5177 
5178   Notes:
5179   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5180   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5181   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5182 
5183   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5184 
5185   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5186   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5187   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5188   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5189 
5190 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5191 @*/
5192 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5193 {
5194   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5195   Mat_SeqAIJ        *mat, *a, *b;
5196   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5197   const PetscScalar *aa, *ba, *aav, *bav;
5198   PetscScalar       *ca, *cam;
5199   PetscMPIInt        size;
5200   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5201   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5202   PetscBool          match;
5203 
5204   PetscFunctionBegin;
5205   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5206   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5207   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5208   if (size == 1) {
5209     if (scall == MAT_INITIAL_MATRIX) {
5210       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5211       *A_loc = mpimat->A;
5212     } else if (scall == MAT_REUSE_MATRIX) {
5213       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5214     }
5215     PetscFunctionReturn(PETSC_SUCCESS);
5216   }
5217 
5218   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5219   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5220   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5221   ai = a->i;
5222   aj = a->j;
5223   bi = b->i;
5224   bj = b->j;
5225   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5226   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5227   aa = aav;
5228   ba = bav;
5229   if (scall == MAT_INITIAL_MATRIX) {
5230     PetscCall(PetscMalloc1(1 + am, &ci));
5231     ci[0] = 0;
5232     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5233     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5234     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5235     k = 0;
5236     for (i = 0; i < am; i++) {
5237       ncols_o = bi[i + 1] - bi[i];
5238       ncols_d = ai[i + 1] - ai[i];
5239       /* off-diagonal portion of A */
5240       for (jo = 0; jo < ncols_o; jo++) {
5241         col = cmap[*bj];
5242         if (col >= cstart) break;
5243         cj[k] = col;
5244         bj++;
5245         ca[k++] = *ba++;
5246       }
5247       /* diagonal portion of A */
5248       for (j = 0; j < ncols_d; j++) {
5249         cj[k]   = cstart + *aj++;
5250         ca[k++] = *aa++;
5251       }
5252       /* off-diagonal portion of A */
5253       for (j = jo; j < ncols_o; j++) {
5254         cj[k]   = cmap[*bj++];
5255         ca[k++] = *ba++;
5256       }
5257     }
5258     /* put together the new matrix */
5259     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5260     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5261     /* Since these are PETSc arrays, change flags to free them as necessary. */
5262     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5263     mat->free_a  = PETSC_TRUE;
5264     mat->free_ij = PETSC_TRUE;
5265     mat->nonew   = 0;
5266   } else if (scall == MAT_REUSE_MATRIX) {
5267     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5268     ci  = mat->i;
5269     cj  = mat->j;
5270     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5271     for (i = 0; i < am; i++) {
5272       /* off-diagonal portion of A */
5273       ncols_o = bi[i + 1] - bi[i];
5274       for (jo = 0; jo < ncols_o; jo++) {
5275         col = cmap[*bj];
5276         if (col >= cstart) break;
5277         *cam++ = *ba++;
5278         bj++;
5279       }
5280       /* diagonal portion of A */
5281       ncols_d = ai[i + 1] - ai[i];
5282       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5283       /* off-diagonal portion of A */
5284       for (j = jo; j < ncols_o; j++) {
5285         *cam++ = *ba++;
5286         bj++;
5287       }
5288     }
5289     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5290   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5291   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5292   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5293   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5294   PetscFunctionReturn(PETSC_SUCCESS);
5295 }
5296 
5297 /*@
5298   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5299   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5300 
5301   Not Collective
5302 
5303   Input Parameters:
5304 + A     - the matrix
5305 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5306 
5307   Output Parameters:
5308 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5309 - A_loc - the local sequential matrix generated
5310 
5311   Level: developer
5312 
5313   Note:
5314   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5315   part, then those associated with the off diagonal part (in its local ordering)
5316 
5317 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5318 @*/
5319 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5320 {
5321   Mat             Ao, Ad;
5322   const PetscInt *cmap;
5323   PetscMPIInt     size;
5324   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5325 
5326   PetscFunctionBegin;
5327   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5328   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5329   if (size == 1) {
5330     if (scall == MAT_INITIAL_MATRIX) {
5331       PetscCall(PetscObjectReference((PetscObject)Ad));
5332       *A_loc = Ad;
5333     } else if (scall == MAT_REUSE_MATRIX) {
5334       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5335     }
5336     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5337     PetscFunctionReturn(PETSC_SUCCESS);
5338   }
5339   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5340   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5341   if (f) {
5342     PetscCall((*f)(A, scall, glob, A_loc));
5343   } else {
5344     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5345     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5346     Mat_SeqAIJ        *c;
5347     PetscInt          *ai = a->i, *aj = a->j;
5348     PetscInt          *bi = b->i, *bj = b->j;
5349     PetscInt          *ci, *cj;
5350     const PetscScalar *aa, *ba;
5351     PetscScalar       *ca;
5352     PetscInt           i, j, am, dn, on;
5353 
5354     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5355     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5356     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5357     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5358     if (scall == MAT_INITIAL_MATRIX) {
5359       PetscInt k;
5360       PetscCall(PetscMalloc1(1 + am, &ci));
5361       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5362       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5363       ci[0] = 0;
5364       for (i = 0, k = 0; i < am; i++) {
5365         const PetscInt ncols_o = bi[i + 1] - bi[i];
5366         const PetscInt ncols_d = ai[i + 1] - ai[i];
5367         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5368         /* diagonal portion of A */
5369         for (j = 0; j < ncols_d; j++, k++) {
5370           cj[k] = *aj++;
5371           ca[k] = *aa++;
5372         }
5373         /* off-diagonal portion of A */
5374         for (j = 0; j < ncols_o; j++, k++) {
5375           cj[k] = dn + *bj++;
5376           ca[k] = *ba++;
5377         }
5378       }
5379       /* put together the new matrix */
5380       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5381       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5382       /* Since these are PETSc arrays, change flags to free them as necessary. */
5383       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5384       c->free_a  = PETSC_TRUE;
5385       c->free_ij = PETSC_TRUE;
5386       c->nonew   = 0;
5387       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5388     } else if (scall == MAT_REUSE_MATRIX) {
5389       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5390       for (i = 0; i < am; i++) {
5391         const PetscInt ncols_d = ai[i + 1] - ai[i];
5392         const PetscInt ncols_o = bi[i + 1] - bi[i];
5393         /* diagonal portion of A */
5394         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5395         /* off-diagonal portion of A */
5396         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5397       }
5398       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5399     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5400     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5401     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5402     if (glob) {
5403       PetscInt cst, *gidx;
5404 
5405       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5406       PetscCall(PetscMalloc1(dn + on, &gidx));
5407       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5408       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5409       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5410     }
5411   }
5412   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5413   PetscFunctionReturn(PETSC_SUCCESS);
5414 }
5415 
5416 /*@C
5417   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5418 
5419   Not Collective
5420 
5421   Input Parameters:
5422 + A     - the matrix
5423 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5424 . row   - index set of rows to extract (or `NULL`)
5425 - col   - index set of columns to extract (or `NULL`)
5426 
5427   Output Parameter:
5428 . A_loc - the local sequential matrix generated
5429 
5430   Level: developer
5431 
5432 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5433 @*/
5434 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5435 {
5436   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5437   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5438   IS          isrowa, iscola;
5439   Mat        *aloc;
5440   PetscBool   match;
5441 
5442   PetscFunctionBegin;
5443   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5444   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5445   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5446   if (!row) {
5447     start = A->rmap->rstart;
5448     end   = A->rmap->rend;
5449     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5450   } else {
5451     isrowa = *row;
5452   }
5453   if (!col) {
5454     start = A->cmap->rstart;
5455     cmap  = a->garray;
5456     nzA   = a->A->cmap->n;
5457     nzB   = a->B->cmap->n;
5458     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5459     ncols = 0;
5460     for (i = 0; i < nzB; i++) {
5461       if (cmap[i] < start) idx[ncols++] = cmap[i];
5462       else break;
5463     }
5464     imark = i;
5465     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5466     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5467     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5468   } else {
5469     iscola = *col;
5470   }
5471   if (scall != MAT_INITIAL_MATRIX) {
5472     PetscCall(PetscMalloc1(1, &aloc));
5473     aloc[0] = *A_loc;
5474   }
5475   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5476   if (!col) { /* attach global id of condensed columns */
5477     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5478   }
5479   *A_loc = aloc[0];
5480   PetscCall(PetscFree(aloc));
5481   if (!row) PetscCall(ISDestroy(&isrowa));
5482   if (!col) PetscCall(ISDestroy(&iscola));
5483   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5484   PetscFunctionReturn(PETSC_SUCCESS);
5485 }
5486 
5487 /*
5488  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5489  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5490  * on a global size.
5491  * */
5492 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5493 {
5494   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5495   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5496   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5497   PetscMPIInt            owner;
5498   PetscSFNode           *iremote, *oiremote;
5499   const PetscInt        *lrowindices;
5500   PetscSF                sf, osf;
5501   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5502   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5503   MPI_Comm               comm;
5504   ISLocalToGlobalMapping mapping;
5505   const PetscScalar     *pd_a, *po_a;
5506 
5507   PetscFunctionBegin;
5508   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5509   /* plocalsize is the number of roots
5510    * nrows is the number of leaves
5511    * */
5512   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5513   PetscCall(ISGetLocalSize(rows, &nrows));
5514   PetscCall(PetscCalloc1(nrows, &iremote));
5515   PetscCall(ISGetIndices(rows, &lrowindices));
5516   for (i = 0; i < nrows; i++) {
5517     /* Find a remote index and an owner for a row
5518      * The row could be local or remote
5519      * */
5520     owner = 0;
5521     lidx  = 0;
5522     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5523     iremote[i].index = lidx;
5524     iremote[i].rank  = owner;
5525   }
5526   /* Create SF to communicate how many nonzero columns for each row */
5527   PetscCall(PetscSFCreate(comm, &sf));
5528   /* SF will figure out the number of nonzero colunms for each row, and their
5529    * offsets
5530    * */
5531   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5532   PetscCall(PetscSFSetFromOptions(sf));
5533   PetscCall(PetscSFSetUp(sf));
5534 
5535   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5536   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5537   PetscCall(PetscCalloc1(nrows, &pnnz));
5538   roffsets[0] = 0;
5539   roffsets[1] = 0;
5540   for (i = 0; i < plocalsize; i++) {
5541     /* diag */
5542     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5543     /* off diag */
5544     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5545     /* compute offsets so that we relative location for each row */
5546     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5547     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5548   }
5549   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5550   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5551   /* 'r' means root, and 'l' means leaf */
5552   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5553   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5554   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5555   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5556   PetscCall(PetscSFDestroy(&sf));
5557   PetscCall(PetscFree(roffsets));
5558   PetscCall(PetscFree(nrcols));
5559   dntotalcols = 0;
5560   ontotalcols = 0;
5561   ncol        = 0;
5562   for (i = 0; i < nrows; i++) {
5563     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5564     ncol    = PetscMax(pnnz[i], ncol);
5565     /* diag */
5566     dntotalcols += nlcols[i * 2 + 0];
5567     /* off diag */
5568     ontotalcols += nlcols[i * 2 + 1];
5569   }
5570   /* We do not need to figure the right number of columns
5571    * since all the calculations will be done by going through the raw data
5572    * */
5573   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5574   PetscCall(MatSetUp(*P_oth));
5575   PetscCall(PetscFree(pnnz));
5576   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5577   /* diag */
5578   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5579   /* off diag */
5580   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5581   /* diag */
5582   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5583   /* off diag */
5584   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5585   dntotalcols = 0;
5586   ontotalcols = 0;
5587   ntotalcols  = 0;
5588   for (i = 0; i < nrows; i++) {
5589     owner = 0;
5590     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5591     /* Set iremote for diag matrix */
5592     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5593       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5594       iremote[dntotalcols].rank  = owner;
5595       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5596       ilocal[dntotalcols++] = ntotalcols++;
5597     }
5598     /* off diag */
5599     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5600       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5601       oiremote[ontotalcols].rank  = owner;
5602       oilocal[ontotalcols++]      = ntotalcols++;
5603     }
5604   }
5605   PetscCall(ISRestoreIndices(rows, &lrowindices));
5606   PetscCall(PetscFree(loffsets));
5607   PetscCall(PetscFree(nlcols));
5608   PetscCall(PetscSFCreate(comm, &sf));
5609   /* P serves as roots and P_oth is leaves
5610    * Diag matrix
5611    * */
5612   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5613   PetscCall(PetscSFSetFromOptions(sf));
5614   PetscCall(PetscSFSetUp(sf));
5615 
5616   PetscCall(PetscSFCreate(comm, &osf));
5617   /* Off diag */
5618   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5619   PetscCall(PetscSFSetFromOptions(osf));
5620   PetscCall(PetscSFSetUp(osf));
5621   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5622   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5623   /* We operate on the matrix internal data for saving memory */
5624   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5625   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5626   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5627   /* Convert to global indices for diag matrix */
5628   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5629   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5630   /* We want P_oth store global indices */
5631   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5632   /* Use memory scalable approach */
5633   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5634   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5635   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5636   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5637   /* Convert back to local indices */
5638   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5639   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5640   nout = 0;
5641   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5642   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5643   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5644   /* Exchange values */
5645   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5646   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5647   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5648   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5649   /* Stop PETSc from shrinking memory */
5650   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5651   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5652   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5653   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5654   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5655   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5656   PetscCall(PetscSFDestroy(&sf));
5657   PetscCall(PetscSFDestroy(&osf));
5658   PetscFunctionReturn(PETSC_SUCCESS);
5659 }
5660 
5661 /*
5662  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5663  * This supports MPIAIJ and MAIJ
5664  * */
5665 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5666 {
5667   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5668   Mat_SeqAIJ *p_oth;
5669   IS          rows, map;
5670   PetscHMapI  hamp;
5671   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5672   MPI_Comm    comm;
5673   PetscSF     sf, osf;
5674   PetscBool   has;
5675 
5676   PetscFunctionBegin;
5677   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5678   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5679   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5680    *  and then create a submatrix (that often is an overlapping matrix)
5681    * */
5682   if (reuse == MAT_INITIAL_MATRIX) {
5683     /* Use a hash table to figure out unique keys */
5684     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5685     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5686     count = 0;
5687     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5688     for (i = 0; i < a->B->cmap->n; i++) {
5689       key = a->garray[i] / dof;
5690       PetscCall(PetscHMapIHas(hamp, key, &has));
5691       if (!has) {
5692         mapping[i] = count;
5693         PetscCall(PetscHMapISet(hamp, key, count++));
5694       } else {
5695         /* Current 'i' has the same value the previous step */
5696         mapping[i] = count - 1;
5697       }
5698     }
5699     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5700     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5701     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5702     PetscCall(PetscCalloc1(htsize, &rowindices));
5703     off = 0;
5704     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5705     PetscCall(PetscHMapIDestroy(&hamp));
5706     PetscCall(PetscSortInt(htsize, rowindices));
5707     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5708     /* In case, the matrix was already created but users want to recreate the matrix */
5709     PetscCall(MatDestroy(P_oth));
5710     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5711     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5712     PetscCall(ISDestroy(&map));
5713     PetscCall(ISDestroy(&rows));
5714   } else if (reuse == MAT_REUSE_MATRIX) {
5715     /* If matrix was already created, we simply update values using SF objects
5716      * that as attached to the matrix earlier.
5717      */
5718     const PetscScalar *pd_a, *po_a;
5719 
5720     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5721     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5722     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5723     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5724     /* Update values in place */
5725     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5726     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5727     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5728     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5729     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5730     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5731     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5732     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5733   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5734   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5735   PetscFunctionReturn(PETSC_SUCCESS);
5736 }
5737 
5738 /*@C
5739   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5740 
5741   Collective
5742 
5743   Input Parameters:
5744 + A     - the first matrix in `MATMPIAIJ` format
5745 . B     - the second matrix in `MATMPIAIJ` format
5746 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5747 
5748   Output Parameters:
5749 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5750 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5751 - B_seq - the sequential matrix generated
5752 
5753   Level: developer
5754 
5755 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5756 @*/
5757 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5758 {
5759   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5760   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5761   IS          isrowb, iscolb;
5762   Mat        *bseq = NULL;
5763 
5764   PetscFunctionBegin;
5765   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5766              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5767   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5768 
5769   if (scall == MAT_INITIAL_MATRIX) {
5770     start = A->cmap->rstart;
5771     cmap  = a->garray;
5772     nzA   = a->A->cmap->n;
5773     nzB   = a->B->cmap->n;
5774     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5775     ncols = 0;
5776     for (i = 0; i < nzB; i++) { /* row < local row index */
5777       if (cmap[i] < start) idx[ncols++] = cmap[i];
5778       else break;
5779     }
5780     imark = i;
5781     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5782     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5783     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5784     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5785   } else {
5786     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5787     isrowb = *rowb;
5788     iscolb = *colb;
5789     PetscCall(PetscMalloc1(1, &bseq));
5790     bseq[0] = *B_seq;
5791   }
5792   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5793   *B_seq = bseq[0];
5794   PetscCall(PetscFree(bseq));
5795   if (!rowb) {
5796     PetscCall(ISDestroy(&isrowb));
5797   } else {
5798     *rowb = isrowb;
5799   }
5800   if (!colb) {
5801     PetscCall(ISDestroy(&iscolb));
5802   } else {
5803     *colb = iscolb;
5804   }
5805   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5806   PetscFunctionReturn(PETSC_SUCCESS);
5807 }
5808 
5809 /*
5810     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5811     of the OFF-DIAGONAL portion of local A
5812 
5813     Collective
5814 
5815    Input Parameters:
5816 +    A,B - the matrices in `MATMPIAIJ` format
5817 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5818 
5819    Output Parameter:
5820 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5821 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5822 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5823 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5824 
5825     Developer Note:
5826     This directly accesses information inside the VecScatter associated with the matrix-vector product
5827      for this matrix. This is not desirable..
5828 
5829     Level: developer
5830 
5831 */
5832 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5833 {
5834   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5835   Mat_SeqAIJ        *b_oth;
5836   VecScatter         ctx;
5837   MPI_Comm           comm;
5838   const PetscMPIInt *rprocs, *sprocs;
5839   const PetscInt    *srow, *rstarts, *sstarts;
5840   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5841   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5842   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5843   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5844   PetscMPIInt        size, tag, rank, nreqs;
5845 
5846   PetscFunctionBegin;
5847   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5848   PetscCallMPI(MPI_Comm_size(comm, &size));
5849 
5850   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5851              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5852   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5853   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5854 
5855   if (size == 1) {
5856     startsj_s = NULL;
5857     bufa_ptr  = NULL;
5858     *B_oth    = NULL;
5859     PetscFunctionReturn(PETSC_SUCCESS);
5860   }
5861 
5862   ctx = a->Mvctx;
5863   tag = ((PetscObject)ctx)->tag;
5864 
5865   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5866   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5867   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5868   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5869   PetscCall(PetscMalloc1(nreqs, &reqs));
5870   rwaits = reqs;
5871   swaits = reqs + nrecvs;
5872 
5873   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5874   if (scall == MAT_INITIAL_MATRIX) {
5875     /* i-array */
5876     /*  post receives */
5877     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5878     for (i = 0; i < nrecvs; i++) {
5879       rowlen = rvalues + rstarts[i] * rbs;
5880       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5881       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5882     }
5883 
5884     /* pack the outgoing message */
5885     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5886 
5887     sstartsj[0] = 0;
5888     rstartsj[0] = 0;
5889     len         = 0; /* total length of j or a array to be sent */
5890     if (nsends) {
5891       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5892       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5893     }
5894     for (i = 0; i < nsends; i++) {
5895       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5896       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5897       for (j = 0; j < nrows; j++) {
5898         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5899         for (l = 0; l < sbs; l++) {
5900           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5901 
5902           rowlen[j * sbs + l] = ncols;
5903 
5904           len += ncols;
5905           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5906         }
5907         k++;
5908       }
5909       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5910 
5911       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5912     }
5913     /* recvs and sends of i-array are completed */
5914     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5915     PetscCall(PetscFree(svalues));
5916 
5917     /* allocate buffers for sending j and a arrays */
5918     PetscCall(PetscMalloc1(len + 1, &bufj));
5919     PetscCall(PetscMalloc1(len + 1, &bufa));
5920 
5921     /* create i-array of B_oth */
5922     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5923 
5924     b_othi[0] = 0;
5925     len       = 0; /* total length of j or a array to be received */
5926     k         = 0;
5927     for (i = 0; i < nrecvs; i++) {
5928       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5929       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5930       for (j = 0; j < nrows; j++) {
5931         b_othi[k + 1] = b_othi[k] + rowlen[j];
5932         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5933         k++;
5934       }
5935       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5936     }
5937     PetscCall(PetscFree(rvalues));
5938 
5939     /* allocate space for j and a arrays of B_oth */
5940     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5941     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5942 
5943     /* j-array */
5944     /*  post receives of j-array */
5945     for (i = 0; i < nrecvs; i++) {
5946       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5947       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5948     }
5949 
5950     /* pack the outgoing message j-array */
5951     if (nsends) k = sstarts[0];
5952     for (i = 0; i < nsends; i++) {
5953       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5954       bufJ  = bufj + sstartsj[i];
5955       for (j = 0; j < nrows; j++) {
5956         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5957         for (ll = 0; ll < sbs; ll++) {
5958           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5959           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5960           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5961         }
5962       }
5963       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5964     }
5965 
5966     /* recvs and sends of j-array are completed */
5967     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5968   } else if (scall == MAT_REUSE_MATRIX) {
5969     sstartsj = *startsj_s;
5970     rstartsj = *startsj_r;
5971     bufa     = *bufa_ptr;
5972     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5973     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5974   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5975 
5976   /* a-array */
5977   /*  post receives of a-array */
5978   for (i = 0; i < nrecvs; i++) {
5979     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5980     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5981   }
5982 
5983   /* pack the outgoing message a-array */
5984   if (nsends) k = sstarts[0];
5985   for (i = 0; i < nsends; i++) {
5986     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5987     bufA  = bufa + sstartsj[i];
5988     for (j = 0; j < nrows; j++) {
5989       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5990       for (ll = 0; ll < sbs; ll++) {
5991         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5992         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5993         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5994       }
5995     }
5996     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5997   }
5998   /* recvs and sends of a-array are completed */
5999   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6000   PetscCall(PetscFree(reqs));
6001 
6002   if (scall == MAT_INITIAL_MATRIX) {
6003     /* put together the new matrix */
6004     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6005 
6006     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6007     /* Since these are PETSc arrays, change flags to free them as necessary. */
6008     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6009     b_oth->free_a  = PETSC_TRUE;
6010     b_oth->free_ij = PETSC_TRUE;
6011     b_oth->nonew   = 0;
6012 
6013     PetscCall(PetscFree(bufj));
6014     if (!startsj_s || !bufa_ptr) {
6015       PetscCall(PetscFree2(sstartsj, rstartsj));
6016       PetscCall(PetscFree(bufa_ptr));
6017     } else {
6018       *startsj_s = sstartsj;
6019       *startsj_r = rstartsj;
6020       *bufa_ptr  = bufa;
6021     }
6022   } else if (scall == MAT_REUSE_MATRIX) {
6023     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6024   }
6025 
6026   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6027   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6028   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6029   PetscFunctionReturn(PETSC_SUCCESS);
6030 }
6031 
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6035 #if defined(PETSC_HAVE_MKL_SPARSE)
6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6037 #endif
6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6040 #if defined(PETSC_HAVE_ELEMENTAL)
6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6042 #endif
6043 #if defined(PETSC_HAVE_SCALAPACK)
6044 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6045 #endif
6046 #if defined(PETSC_HAVE_HYPRE)
6047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6048 #endif
6049 #if defined(PETSC_HAVE_CUDA)
6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6051 #endif
6052 #if defined(PETSC_HAVE_HIP)
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6054 #endif
6055 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6057 #endif
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6059 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6060 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6061 
6062 /*
6063     Computes (B'*A')' since computing B*A directly is untenable
6064 
6065                n                       p                          p
6066         [             ]       [             ]         [                 ]
6067       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6068         [             ]       [             ]         [                 ]
6069 
6070 */
6071 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6072 {
6073   Mat At, Bt, Ct;
6074 
6075   PetscFunctionBegin;
6076   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6077   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6078   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6079   PetscCall(MatDestroy(&At));
6080   PetscCall(MatDestroy(&Bt));
6081   PetscCall(MatTransposeSetPrecursor(Ct, C));
6082   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6083   PetscCall(MatDestroy(&Ct));
6084   PetscFunctionReturn(PETSC_SUCCESS);
6085 }
6086 
6087 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6088 {
6089   PetscBool cisdense;
6090 
6091   PetscFunctionBegin;
6092   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6093   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6094   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6095   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6096   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6097   PetscCall(MatSetUp(C));
6098 
6099   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6100   PetscFunctionReturn(PETSC_SUCCESS);
6101 }
6102 
6103 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6104 {
6105   Mat_Product *product = C->product;
6106   Mat          A = product->A, B = product->B;
6107 
6108   PetscFunctionBegin;
6109   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6110              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6111   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6112   C->ops->productsymbolic = MatProductSymbolic_AB;
6113   PetscFunctionReturn(PETSC_SUCCESS);
6114 }
6115 
6116 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6117 {
6118   Mat_Product *product = C->product;
6119 
6120   PetscFunctionBegin;
6121   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6122   PetscFunctionReturn(PETSC_SUCCESS);
6123 }
6124 
6125 /*
6126    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6127 
6128   Input Parameters:
6129 
6130     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6131     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6132 
6133     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6134 
6135     For Set1, j1[] contains column indices of the nonzeros.
6136     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6137     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6138     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6139 
6140     Similar for Set2.
6141 
6142     This routine merges the two sets of nonzeros row by row and removes repeats.
6143 
6144   Output Parameters: (memory is allocated by the caller)
6145 
6146     i[],j[]: the CSR of the merged matrix, which has m rows.
6147     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6148     imap2[]: similar to imap1[], but for Set2.
6149     Note we order nonzeros row-by-row and from left to right.
6150 */
6151 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6152 {
6153   PetscInt   r, m; /* Row index of mat */
6154   PetscCount t, t1, t2, b1, e1, b2, e2;
6155 
6156   PetscFunctionBegin;
6157   PetscCall(MatGetLocalSize(mat, &m, NULL));
6158   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6159   i[0]        = 0;
6160   for (r = 0; r < m; r++) { /* Do row by row merging */
6161     b1 = rowBegin1[r];
6162     e1 = rowEnd1[r];
6163     b2 = rowBegin2[r];
6164     e2 = rowEnd2[r];
6165     while (b1 < e1 && b2 < e2) {
6166       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6167         j[t]      = j1[b1];
6168         imap1[t1] = t;
6169         imap2[t2] = t;
6170         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6171         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6172         t1++;
6173         t2++;
6174         t++;
6175       } else if (j1[b1] < j2[b2]) {
6176         j[t]      = j1[b1];
6177         imap1[t1] = t;
6178         b1 += jmap1[t1 + 1] - jmap1[t1];
6179         t1++;
6180         t++;
6181       } else {
6182         j[t]      = j2[b2];
6183         imap2[t2] = t;
6184         b2 += jmap2[t2 + 1] - jmap2[t2];
6185         t2++;
6186         t++;
6187       }
6188     }
6189     /* Merge the remaining in either j1[] or j2[] */
6190     while (b1 < e1) {
6191       j[t]      = j1[b1];
6192       imap1[t1] = t;
6193       b1 += jmap1[t1 + 1] - jmap1[t1];
6194       t1++;
6195       t++;
6196     }
6197     while (b2 < e2) {
6198       j[t]      = j2[b2];
6199       imap2[t2] = t;
6200       b2 += jmap2[t2 + 1] - jmap2[t2];
6201       t2++;
6202       t++;
6203     }
6204     i[r + 1] = t;
6205   }
6206   PetscFunctionReturn(PETSC_SUCCESS);
6207 }
6208 
6209 /*
6210   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6211 
6212   Input Parameters:
6213     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6214     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6215       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6216 
6217       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6218       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6219 
6220   Output Parameters:
6221     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6222     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6223       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6224       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6225 
6226     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6227       Atot: number of entries belonging to the diagonal block.
6228       Annz: number of unique nonzeros belonging to the diagonal block.
6229       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6230         repeats (i.e., same 'i,j' pair).
6231       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6232         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6233 
6234       Atot: number of entries belonging to the diagonal block
6235       Annz: number of unique nonzeros belonging to the diagonal block.
6236 
6237     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6238 
6239     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6240 */
6241 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6242 {
6243   PetscInt    cstart, cend, rstart, rend, row, col;
6244   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6245   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6246   PetscCount  k, m, p, q, r, s, mid;
6247   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6248 
6249   PetscFunctionBegin;
6250   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6251   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6252   m = rend - rstart;
6253 
6254   /* Skip negative rows */
6255   for (k = 0; k < n; k++)
6256     if (i[k] >= 0) break;
6257 
6258   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6259      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6260   */
6261   while (k < n) {
6262     row = i[k];
6263     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6264     for (s = k; s < n; s++)
6265       if (i[s] != row) break;
6266 
6267     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6268     for (p = k; p < s; p++) {
6269       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6270       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6271     }
6272     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6273     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6274     rowBegin[row - rstart] = k;
6275     rowMid[row - rstart]   = mid;
6276     rowEnd[row - rstart]   = s;
6277 
6278     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6279     Atot += mid - k;
6280     Btot += s - mid;
6281 
6282     /* Count unique nonzeros of this diag row */
6283     for (p = k; p < mid;) {
6284       col = j[p];
6285       do {
6286         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6287         p++;
6288       } while (p < mid && j[p] == col);
6289       Annz++;
6290     }
6291 
6292     /* Count unique nonzeros of this offdiag row */
6293     for (p = mid; p < s;) {
6294       col = j[p];
6295       do {
6296         p++;
6297       } while (p < s && j[p] == col);
6298       Bnnz++;
6299     }
6300     k = s;
6301   }
6302 
6303   /* Allocation according to Atot, Btot, Annz, Bnnz */
6304   PetscCall(PetscMalloc1(Atot, &Aperm));
6305   PetscCall(PetscMalloc1(Btot, &Bperm));
6306   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6307   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6308 
6309   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6310   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6311   for (r = 0; r < m; r++) {
6312     k   = rowBegin[r];
6313     mid = rowMid[r];
6314     s   = rowEnd[r];
6315     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6316     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6317     Atot += mid - k;
6318     Btot += s - mid;
6319 
6320     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6321     for (p = k; p < mid;) {
6322       col = j[p];
6323       q   = p;
6324       do {
6325         p++;
6326       } while (p < mid && j[p] == col);
6327       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6328       Annz++;
6329     }
6330 
6331     for (p = mid; p < s;) {
6332       col = j[p];
6333       q   = p;
6334       do {
6335         p++;
6336       } while (p < s && j[p] == col);
6337       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6338       Bnnz++;
6339     }
6340   }
6341   /* Output */
6342   *Aperm_ = Aperm;
6343   *Annz_  = Annz;
6344   *Atot_  = Atot;
6345   *Ajmap_ = Ajmap;
6346   *Bperm_ = Bperm;
6347   *Bnnz_  = Bnnz;
6348   *Btot_  = Btot;
6349   *Bjmap_ = Bjmap;
6350   PetscFunctionReturn(PETSC_SUCCESS);
6351 }
6352 
6353 /*
6354   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6355 
6356   Input Parameters:
6357     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6358     nnz:  number of unique nonzeros in the merged matrix
6359     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6360     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6361 
6362   Output Parameter: (memory is allocated by the caller)
6363     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6364 
6365   Example:
6366     nnz1 = 4
6367     nnz  = 6
6368     imap = [1,3,4,5]
6369     jmap = [0,3,5,6,7]
6370    then,
6371     jmap_new = [0,0,3,3,5,6,7]
6372 */
6373 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6374 {
6375   PetscCount k, p;
6376 
6377   PetscFunctionBegin;
6378   jmap_new[0] = 0;
6379   p           = nnz;                /* p loops over jmap_new[] backwards */
6380   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6381     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6382   }
6383   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6384   PetscFunctionReturn(PETSC_SUCCESS);
6385 }
6386 
6387 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6388 {
6389   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6390 
6391   PetscFunctionBegin;
6392   PetscCall(PetscSFDestroy(&coo->sf));
6393   PetscCall(PetscFree(coo->Aperm1));
6394   PetscCall(PetscFree(coo->Bperm1));
6395   PetscCall(PetscFree(coo->Ajmap1));
6396   PetscCall(PetscFree(coo->Bjmap1));
6397   PetscCall(PetscFree(coo->Aimap2));
6398   PetscCall(PetscFree(coo->Bimap2));
6399   PetscCall(PetscFree(coo->Aperm2));
6400   PetscCall(PetscFree(coo->Bperm2));
6401   PetscCall(PetscFree(coo->Ajmap2));
6402   PetscCall(PetscFree(coo->Bjmap2));
6403   PetscCall(PetscFree(coo->Cperm1));
6404   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6405   PetscCall(PetscFree(coo));
6406   PetscFunctionReturn(PETSC_SUCCESS);
6407 }
6408 
6409 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6410 {
6411   MPI_Comm             comm;
6412   PetscMPIInt          rank, size;
6413   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6414   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6415   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6416   PetscContainer       container;
6417   MatCOOStruct_MPIAIJ *coo;
6418 
6419   PetscFunctionBegin;
6420   PetscCall(PetscFree(mpiaij->garray));
6421   PetscCall(VecDestroy(&mpiaij->lvec));
6422 #if defined(PETSC_USE_CTABLE)
6423   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6424 #else
6425   PetscCall(PetscFree(mpiaij->colmap));
6426 #endif
6427   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6428   mat->assembled     = PETSC_FALSE;
6429   mat->was_assembled = PETSC_FALSE;
6430 
6431   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6432   PetscCallMPI(MPI_Comm_size(comm, &size));
6433   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6434   PetscCall(PetscLayoutSetUp(mat->rmap));
6435   PetscCall(PetscLayoutSetUp(mat->cmap));
6436   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6437   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6438   PetscCall(MatGetLocalSize(mat, &m, &n));
6439   PetscCall(MatGetSize(mat, &M, &N));
6440 
6441   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6442   /* entries come first, then local rows, then remote rows.                     */
6443   PetscCount n1 = coo_n, *perm1;
6444   PetscInt  *i1 = coo_i, *j1 = coo_j;
6445 
6446   PetscCall(PetscMalloc1(n1, &perm1));
6447   for (k = 0; k < n1; k++) perm1[k] = k;
6448 
6449   /* Manipulate indices so that entries with negative row or col indices will have smallest
6450      row indices, local entries will have greater but negative row indices, and remote entries
6451      will have positive row indices.
6452   */
6453   for (k = 0; k < n1; k++) {
6454     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6455     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6456     else {
6457       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6458       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6459     }
6460   }
6461 
6462   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6463   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6464 
6465   /* Advance k to the first entry we need to take care of */
6466   for (k = 0; k < n1; k++)
6467     if (i1[k] > PETSC_MIN_INT) break;
6468   PetscInt i1start = k;
6469 
6470   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6471   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6472 
6473   /*           Send remote rows to their owner                                  */
6474   /* Find which rows should be sent to which remote ranks*/
6475   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6476   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6477   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6478   const PetscInt *ranges;
6479   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6480 
6481   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6482   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6483   for (k = rem; k < n1;) {
6484     PetscMPIInt owner;
6485     PetscInt    firstRow, lastRow;
6486 
6487     /* Locate a row range */
6488     firstRow = i1[k]; /* first row of this owner */
6489     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6490     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6491 
6492     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6493     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6494 
6495     /* All entries in [k,p) belong to this remote owner */
6496     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6497       PetscMPIInt *sendto2;
6498       PetscInt    *nentries2;
6499       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6500 
6501       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6502       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6503       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6504       PetscCall(PetscFree2(sendto, nentries2));
6505       sendto   = sendto2;
6506       nentries = nentries2;
6507       maxNsend = maxNsend2;
6508     }
6509     sendto[nsend]   = owner;
6510     nentries[nsend] = p - k;
6511     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6512     nsend++;
6513     k = p;
6514   }
6515 
6516   /* Build 1st SF to know offsets on remote to send data */
6517   PetscSF      sf1;
6518   PetscInt     nroots = 1, nroots2 = 0;
6519   PetscInt     nleaves = nsend, nleaves2 = 0;
6520   PetscInt    *offsets;
6521   PetscSFNode *iremote;
6522 
6523   PetscCall(PetscSFCreate(comm, &sf1));
6524   PetscCall(PetscMalloc1(nsend, &iremote));
6525   PetscCall(PetscMalloc1(nsend, &offsets));
6526   for (k = 0; k < nsend; k++) {
6527     iremote[k].rank  = sendto[k];
6528     iremote[k].index = 0;
6529     nleaves2 += nentries[k];
6530     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6531   }
6532   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6533   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6534   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6535   PetscCall(PetscSFDestroy(&sf1));
6536   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6537 
6538   /* Build 2nd SF to send remote COOs to their owner */
6539   PetscSF sf2;
6540   nroots  = nroots2;
6541   nleaves = nleaves2;
6542   PetscCall(PetscSFCreate(comm, &sf2));
6543   PetscCall(PetscSFSetFromOptions(sf2));
6544   PetscCall(PetscMalloc1(nleaves, &iremote));
6545   p = 0;
6546   for (k = 0; k < nsend; k++) {
6547     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6548     for (q = 0; q < nentries[k]; q++, p++) {
6549       iremote[p].rank  = sendto[k];
6550       iremote[p].index = offsets[k] + q;
6551     }
6552   }
6553   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6554 
6555   /* Send the remote COOs to their owner */
6556   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6557   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6558   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6559   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6560   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6561   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6562   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6563 
6564   PetscCall(PetscFree(offsets));
6565   PetscCall(PetscFree2(sendto, nentries));
6566 
6567   /* Sort received COOs by row along with the permutation array     */
6568   for (k = 0; k < n2; k++) perm2[k] = k;
6569   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6570 
6571   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6572   PetscCount *Cperm1;
6573   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6574   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves));
6575 
6576   /* Support for HYPRE matrices, kind of a hack.
6577      Swap min column with diagonal so that diagonal values will go first */
6578   PetscBool   hypre;
6579   const char *name;
6580   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6581   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6582   if (hypre) {
6583     PetscInt *minj;
6584     PetscBT   hasdiag;
6585 
6586     PetscCall(PetscBTCreate(m, &hasdiag));
6587     PetscCall(PetscMalloc1(m, &minj));
6588     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6589     for (k = i1start; k < rem; k++) {
6590       if (j1[k] < cstart || j1[k] >= cend) continue;
6591       const PetscInt rindex = i1[k] - rstart;
6592       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6593       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6594     }
6595     for (k = 0; k < n2; k++) {
6596       if (j2[k] < cstart || j2[k] >= cend) continue;
6597       const PetscInt rindex = i2[k] - rstart;
6598       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6599       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6600     }
6601     for (k = i1start; k < rem; k++) {
6602       const PetscInt rindex = i1[k] - rstart;
6603       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6604       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6605       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6606     }
6607     for (k = 0; k < n2; k++) {
6608       const PetscInt rindex = i2[k] - rstart;
6609       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6610       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6611       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6612     }
6613     PetscCall(PetscBTDestroy(&hasdiag));
6614     PetscCall(PetscFree(minj));
6615   }
6616 
6617   /* Split local COOs and received COOs into diag/offdiag portions */
6618   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6619   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6620   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6621   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6622   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6623   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6624 
6625   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6626   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6627   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6628   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6629 
6630   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6631   PetscInt *Ai, *Bi;
6632   PetscInt *Aj, *Bj;
6633 
6634   PetscCall(PetscMalloc1(m + 1, &Ai));
6635   PetscCall(PetscMalloc1(m + 1, &Bi));
6636   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6637   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6638 
6639   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6640   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6641   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6642   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6643   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6644 
6645   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6646   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6647 
6648   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6649   /* expect nonzeros in A/B most likely have local contributing entries        */
6650   PetscInt    Annz = Ai[m];
6651   PetscInt    Bnnz = Bi[m];
6652   PetscCount *Ajmap1_new, *Bjmap1_new;
6653 
6654   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6655   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6656 
6657   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6658   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6659 
6660   PetscCall(PetscFree(Aimap1));
6661   PetscCall(PetscFree(Ajmap1));
6662   PetscCall(PetscFree(Bimap1));
6663   PetscCall(PetscFree(Bjmap1));
6664   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6665   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6666   PetscCall(PetscFree(perm1));
6667   PetscCall(PetscFree3(i2, j2, perm2));
6668 
6669   Ajmap1 = Ajmap1_new;
6670   Bjmap1 = Bjmap1_new;
6671 
6672   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6673   if (Annz < Annz1 + Annz2) {
6674     PetscInt *Aj_new;
6675     PetscCall(PetscMalloc1(Annz, &Aj_new));
6676     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6677     PetscCall(PetscFree(Aj));
6678     Aj = Aj_new;
6679   }
6680 
6681   if (Bnnz < Bnnz1 + Bnnz2) {
6682     PetscInt *Bj_new;
6683     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6684     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6685     PetscCall(PetscFree(Bj));
6686     Bj = Bj_new;
6687   }
6688 
6689   /* Create new submatrices for on-process and off-process coupling                  */
6690   PetscScalar     *Aa, *Ba;
6691   MatType          rtype;
6692   Mat_SeqAIJ      *a, *b;
6693   PetscObjectState state;
6694   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6695   PetscCall(PetscCalloc1(Bnnz, &Ba));
6696   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6697   if (cstart) {
6698     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6699   }
6700   PetscCall(MatDestroy(&mpiaij->A));
6701   PetscCall(MatDestroy(&mpiaij->B));
6702   PetscCall(MatGetRootType_Private(mat, &rtype));
6703   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6704   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6705   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6706   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6707   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6708   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6709 
6710   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6711   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6712   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6713   a->free_a = b->free_a = PETSC_TRUE;
6714   a->free_ij = b->free_ij = PETSC_TRUE;
6715 
6716   /* conversion must happen AFTER multiply setup */
6717   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6718   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6719   PetscCall(VecDestroy(&mpiaij->lvec));
6720   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6721 
6722   // Put the COO struct in a container and then attach that to the matrix
6723   PetscCall(PetscMalloc1(1, &coo));
6724   coo->n       = coo_n;
6725   coo->sf      = sf2;
6726   coo->sendlen = nleaves;
6727   coo->recvlen = nroots;
6728   coo->Annz    = Annz;
6729   coo->Bnnz    = Bnnz;
6730   coo->Annz2   = Annz2;
6731   coo->Bnnz2   = Bnnz2;
6732   coo->Atot1   = Atot1;
6733   coo->Atot2   = Atot2;
6734   coo->Btot1   = Btot1;
6735   coo->Btot2   = Btot2;
6736   coo->Ajmap1  = Ajmap1;
6737   coo->Aperm1  = Aperm1;
6738   coo->Bjmap1  = Bjmap1;
6739   coo->Bperm1  = Bperm1;
6740   coo->Aimap2  = Aimap2;
6741   coo->Ajmap2  = Ajmap2;
6742   coo->Aperm2  = Aperm2;
6743   coo->Bimap2  = Bimap2;
6744   coo->Bjmap2  = Bjmap2;
6745   coo->Bperm2  = Bperm2;
6746   coo->Cperm1  = Cperm1;
6747   // Allocate in preallocation. If not used, it has zero cost on host
6748   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6749   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6750   PetscCall(PetscContainerSetPointer(container, coo));
6751   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6752   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6753   PetscCall(PetscContainerDestroy(&container));
6754   PetscFunctionReturn(PETSC_SUCCESS);
6755 }
6756 
6757 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6758 {
6759   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6760   Mat                  A = mpiaij->A, B = mpiaij->B;
6761   PetscScalar         *Aa, *Ba;
6762   PetscScalar         *sendbuf, *recvbuf;
6763   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6764   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6765   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6766   const PetscCount    *Cperm1;
6767   PetscContainer       container;
6768   MatCOOStruct_MPIAIJ *coo;
6769 
6770   PetscFunctionBegin;
6771   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6772   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6773   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6774   sendbuf = coo->sendbuf;
6775   recvbuf = coo->recvbuf;
6776   Ajmap1  = coo->Ajmap1;
6777   Ajmap2  = coo->Ajmap2;
6778   Aimap2  = coo->Aimap2;
6779   Bjmap1  = coo->Bjmap1;
6780   Bjmap2  = coo->Bjmap2;
6781   Bimap2  = coo->Bimap2;
6782   Aperm1  = coo->Aperm1;
6783   Aperm2  = coo->Aperm2;
6784   Bperm1  = coo->Bperm1;
6785   Bperm2  = coo->Bperm2;
6786   Cperm1  = coo->Cperm1;
6787 
6788   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6789   PetscCall(MatSeqAIJGetArray(B, &Ba));
6790 
6791   /* Pack entries to be sent to remote */
6792   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6793 
6794   /* Send remote entries to their owner and overlap the communication with local computation */
6795   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6796   /* Add local entries to A and B */
6797   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6798     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6799     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6800     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6801   }
6802   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6803     PetscScalar sum = 0.0;
6804     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6805     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6806   }
6807   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6808 
6809   /* Add received remote entries to A and B */
6810   for (PetscCount i = 0; i < coo->Annz2; i++) {
6811     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6812   }
6813   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6814     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6815   }
6816   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6817   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6818   PetscFunctionReturn(PETSC_SUCCESS);
6819 }
6820 
6821 /*MC
6822    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6823 
6824    Options Database Keys:
6825 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6826 
6827    Level: beginner
6828 
6829    Notes:
6830    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6831     in this case the values associated with the rows and columns one passes in are set to zero
6832     in the matrix
6833 
6834     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6835     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6836 
6837 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6838 M*/
6839 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6840 {
6841   Mat_MPIAIJ *b;
6842   PetscMPIInt size;
6843 
6844   PetscFunctionBegin;
6845   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6846 
6847   PetscCall(PetscNew(&b));
6848   B->data       = (void *)b;
6849   B->ops[0]     = MatOps_Values;
6850   B->assembled  = PETSC_FALSE;
6851   B->insertmode = NOT_SET_VALUES;
6852   b->size       = size;
6853 
6854   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6855 
6856   /* build cache for off array entries formed */
6857   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6858 
6859   b->donotstash  = PETSC_FALSE;
6860   b->colmap      = NULL;
6861   b->garray      = NULL;
6862   b->roworiented = PETSC_TRUE;
6863 
6864   /* stuff used for matrix vector multiply */
6865   b->lvec  = NULL;
6866   b->Mvctx = NULL;
6867 
6868   /* stuff for MatGetRow() */
6869   b->rowindices   = NULL;
6870   b->rowvalues    = NULL;
6871   b->getrowactive = PETSC_FALSE;
6872 
6873   /* flexible pointer used in CUSPARSE classes */
6874   b->spptr = NULL;
6875 
6876   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6877   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6878   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6879   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6880   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6881   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6882   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6883   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6884   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6885   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6886 #if defined(PETSC_HAVE_CUDA)
6887   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6888 #endif
6889 #if defined(PETSC_HAVE_HIP)
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6891 #endif
6892 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6894 #endif
6895 #if defined(PETSC_HAVE_MKL_SPARSE)
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6897 #endif
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6902 #if defined(PETSC_HAVE_ELEMENTAL)
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6904 #endif
6905 #if defined(PETSC_HAVE_SCALAPACK)
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6907 #endif
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6910 #if defined(PETSC_HAVE_HYPRE)
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6913 #endif
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6918   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6919   PetscFunctionReturn(PETSC_SUCCESS);
6920 }
6921 
6922 /*@C
6923   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6924   and "off-diagonal" part of the matrix in CSR format.
6925 
6926   Collective
6927 
6928   Input Parameters:
6929 + comm - MPI communicator
6930 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6931 . n    - This value should be the same as the local size used in creating the
6932        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6933        calculated if `N` is given) For square matrices `n` is almost always `m`.
6934 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6935 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6936 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6937 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6938 . a    - matrix values
6939 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6940 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6941 - oa   - matrix values
6942 
6943   Output Parameter:
6944 . mat - the matrix
6945 
6946   Level: advanced
6947 
6948   Notes:
6949   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6950   must free the arrays once the matrix has been destroyed and not before.
6951 
6952   The `i` and `j` indices are 0 based
6953 
6954   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6955 
6956   This sets local rows and cannot be used to set off-processor values.
6957 
6958   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6959   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6960   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6961   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6962   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6963   communication if it is known that only local entries will be set.
6964 
6965 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6966           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6967 @*/
6968 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6969 {
6970   Mat_MPIAIJ *maij;
6971 
6972   PetscFunctionBegin;
6973   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6974   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6975   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6976   PetscCall(MatCreate(comm, mat));
6977   PetscCall(MatSetSizes(*mat, m, n, M, N));
6978   PetscCall(MatSetType(*mat, MATMPIAIJ));
6979   maij = (Mat_MPIAIJ *)(*mat)->data;
6980 
6981   (*mat)->preallocated = PETSC_TRUE;
6982 
6983   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6984   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6985 
6986   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6987   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6988 
6989   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6990   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6991   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6992   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6993   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6994   PetscFunctionReturn(PETSC_SUCCESS);
6995 }
6996 
6997 typedef struct {
6998   Mat       *mp;    /* intermediate products */
6999   PetscBool *mptmp; /* is the intermediate product temporary ? */
7000   PetscInt   cp;    /* number of intermediate products */
7001 
7002   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7003   PetscInt    *startsj_s, *startsj_r;
7004   PetscScalar *bufa;
7005   Mat          P_oth;
7006 
7007   /* may take advantage of merging product->B */
7008   Mat Bloc; /* B-local by merging diag and off-diag */
7009 
7010   /* cusparse does not have support to split between symbolic and numeric phases.
7011      When api_user is true, we don't need to update the numerical values
7012      of the temporary storage */
7013   PetscBool reusesym;
7014 
7015   /* support for COO values insertion */
7016   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7017   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7018   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7019   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7020   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7021   PetscMemType mtype;
7022 
7023   /* customization */
7024   PetscBool abmerge;
7025   PetscBool P_oth_bind;
7026 } MatMatMPIAIJBACKEND;
7027 
7028 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7029 {
7030   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7031   PetscInt             i;
7032 
7033   PetscFunctionBegin;
7034   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7035   PetscCall(PetscFree(mmdata->bufa));
7036   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7037   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7038   PetscCall(MatDestroy(&mmdata->P_oth));
7039   PetscCall(MatDestroy(&mmdata->Bloc));
7040   PetscCall(PetscSFDestroy(&mmdata->sf));
7041   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7042   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7043   PetscCall(PetscFree(mmdata->own[0]));
7044   PetscCall(PetscFree(mmdata->own));
7045   PetscCall(PetscFree(mmdata->off[0]));
7046   PetscCall(PetscFree(mmdata->off));
7047   PetscCall(PetscFree(mmdata));
7048   PetscFunctionReturn(PETSC_SUCCESS);
7049 }
7050 
7051 /* Copy selected n entries with indices in idx[] of A to v[].
7052    If idx is NULL, copy the whole data array of A to v[]
7053  */
7054 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7055 {
7056   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7057 
7058   PetscFunctionBegin;
7059   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7060   if (f) {
7061     PetscCall((*f)(A, n, idx, v));
7062   } else {
7063     const PetscScalar *vv;
7064 
7065     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7066     if (n && idx) {
7067       PetscScalar    *w  = v;
7068       const PetscInt *oi = idx;
7069       PetscInt        j;
7070 
7071       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7072     } else {
7073       PetscCall(PetscArraycpy(v, vv, n));
7074     }
7075     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7076   }
7077   PetscFunctionReturn(PETSC_SUCCESS);
7078 }
7079 
7080 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7081 {
7082   MatMatMPIAIJBACKEND *mmdata;
7083   PetscInt             i, n_d, n_o;
7084 
7085   PetscFunctionBegin;
7086   MatCheckProduct(C, 1);
7087   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7088   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7089   if (!mmdata->reusesym) { /* update temporary matrices */
7090     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7091     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7092   }
7093   mmdata->reusesym = PETSC_FALSE;
7094 
7095   for (i = 0; i < mmdata->cp; i++) {
7096     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7097     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7098   }
7099   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7100     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7101 
7102     if (mmdata->mptmp[i]) continue;
7103     if (noff) {
7104       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7105 
7106       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7107       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7108       n_o += noff;
7109       n_d += nown;
7110     } else {
7111       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7112 
7113       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7114       n_d += mm->nz;
7115     }
7116   }
7117   if (mmdata->hasoffproc) { /* offprocess insertion */
7118     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7119     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7120   }
7121   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7122   PetscFunctionReturn(PETSC_SUCCESS);
7123 }
7124 
7125 /* Support for Pt * A, A * P, or Pt * A * P */
7126 #define MAX_NUMBER_INTERMEDIATE 4
7127 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7128 {
7129   Mat_Product           *product = C->product;
7130   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7131   Mat_MPIAIJ            *a, *p;
7132   MatMatMPIAIJBACKEND   *mmdata;
7133   ISLocalToGlobalMapping P_oth_l2g = NULL;
7134   IS                     glob      = NULL;
7135   const char            *prefix;
7136   char                   pprefix[256];
7137   const PetscInt        *globidx, *P_oth_idx;
7138   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7139   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7140   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7141                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7142                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7143   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7144 
7145   MatProductType ptype;
7146   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7147   PetscMPIInt    size;
7148 
7149   PetscFunctionBegin;
7150   MatCheckProduct(C, 1);
7151   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7152   ptype = product->type;
7153   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7154     ptype                                          = MATPRODUCT_AB;
7155     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7156   }
7157   switch (ptype) {
7158   case MATPRODUCT_AB:
7159     A          = product->A;
7160     P          = product->B;
7161     m          = A->rmap->n;
7162     n          = P->cmap->n;
7163     M          = A->rmap->N;
7164     N          = P->cmap->N;
7165     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7166     break;
7167   case MATPRODUCT_AtB:
7168     P          = product->A;
7169     A          = product->B;
7170     m          = P->cmap->n;
7171     n          = A->cmap->n;
7172     M          = P->cmap->N;
7173     N          = A->cmap->N;
7174     hasoffproc = PETSC_TRUE;
7175     break;
7176   case MATPRODUCT_PtAP:
7177     A          = product->A;
7178     P          = product->B;
7179     m          = P->cmap->n;
7180     n          = P->cmap->n;
7181     M          = P->cmap->N;
7182     N          = P->cmap->N;
7183     hasoffproc = PETSC_TRUE;
7184     break;
7185   default:
7186     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7187   }
7188   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7189   if (size == 1) hasoffproc = PETSC_FALSE;
7190 
7191   /* defaults */
7192   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7193     mp[i]    = NULL;
7194     mptmp[i] = PETSC_FALSE;
7195     rmapt[i] = -1;
7196     cmapt[i] = -1;
7197     rmapa[i] = NULL;
7198     cmapa[i] = NULL;
7199   }
7200 
7201   /* customization */
7202   PetscCall(PetscNew(&mmdata));
7203   mmdata->reusesym = product->api_user;
7204   if (ptype == MATPRODUCT_AB) {
7205     if (product->api_user) {
7206       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7207       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7208       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7209       PetscOptionsEnd();
7210     } else {
7211       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7212       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7213       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7214       PetscOptionsEnd();
7215     }
7216   } else if (ptype == MATPRODUCT_PtAP) {
7217     if (product->api_user) {
7218       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7219       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7220       PetscOptionsEnd();
7221     } else {
7222       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7223       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7224       PetscOptionsEnd();
7225     }
7226   }
7227   a = (Mat_MPIAIJ *)A->data;
7228   p = (Mat_MPIAIJ *)P->data;
7229   PetscCall(MatSetSizes(C, m, n, M, N));
7230   PetscCall(PetscLayoutSetUp(C->rmap));
7231   PetscCall(PetscLayoutSetUp(C->cmap));
7232   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7233   PetscCall(MatGetOptionsPrefix(C, &prefix));
7234 
7235   cp = 0;
7236   switch (ptype) {
7237   case MATPRODUCT_AB: /* A * P */
7238     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7239 
7240     /* A_diag * P_local (merged or not) */
7241     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7242       /* P is product->B */
7243       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7244       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7245       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7246       PetscCall(MatProductSetFill(mp[cp], product->fill));
7247       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7248       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7249       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7250       mp[cp]->product->api_user = product->api_user;
7251       PetscCall(MatProductSetFromOptions(mp[cp]));
7252       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7253       PetscCall(ISGetIndices(glob, &globidx));
7254       rmapt[cp] = 1;
7255       cmapt[cp] = 2;
7256       cmapa[cp] = globidx;
7257       mptmp[cp] = PETSC_FALSE;
7258       cp++;
7259     } else { /* A_diag * P_diag and A_diag * P_off */
7260       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7261       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7262       PetscCall(MatProductSetFill(mp[cp], product->fill));
7263       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7264       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7265       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7266       mp[cp]->product->api_user = product->api_user;
7267       PetscCall(MatProductSetFromOptions(mp[cp]));
7268       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7269       rmapt[cp] = 1;
7270       cmapt[cp] = 1;
7271       mptmp[cp] = PETSC_FALSE;
7272       cp++;
7273       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7274       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7275       PetscCall(MatProductSetFill(mp[cp], product->fill));
7276       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7277       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7278       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7279       mp[cp]->product->api_user = product->api_user;
7280       PetscCall(MatProductSetFromOptions(mp[cp]));
7281       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7282       rmapt[cp] = 1;
7283       cmapt[cp] = 2;
7284       cmapa[cp] = p->garray;
7285       mptmp[cp] = PETSC_FALSE;
7286       cp++;
7287     }
7288 
7289     /* A_off * P_other */
7290     if (mmdata->P_oth) {
7291       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7292       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7293       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7294       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7295       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7296       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7297       PetscCall(MatProductSetFill(mp[cp], product->fill));
7298       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7299       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7300       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7301       mp[cp]->product->api_user = product->api_user;
7302       PetscCall(MatProductSetFromOptions(mp[cp]));
7303       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7304       rmapt[cp] = 1;
7305       cmapt[cp] = 2;
7306       cmapa[cp] = P_oth_idx;
7307       mptmp[cp] = PETSC_FALSE;
7308       cp++;
7309     }
7310     break;
7311 
7312   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7313     /* A is product->B */
7314     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7315     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7316       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7317       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7318       PetscCall(MatProductSetFill(mp[cp], product->fill));
7319       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7320       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7321       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7322       mp[cp]->product->api_user = product->api_user;
7323       PetscCall(MatProductSetFromOptions(mp[cp]));
7324       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7325       PetscCall(ISGetIndices(glob, &globidx));
7326       rmapt[cp] = 2;
7327       rmapa[cp] = globidx;
7328       cmapt[cp] = 2;
7329       cmapa[cp] = globidx;
7330       mptmp[cp] = PETSC_FALSE;
7331       cp++;
7332     } else {
7333       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7334       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7335       PetscCall(MatProductSetFill(mp[cp], product->fill));
7336       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7337       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7338       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7339       mp[cp]->product->api_user = product->api_user;
7340       PetscCall(MatProductSetFromOptions(mp[cp]));
7341       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7342       PetscCall(ISGetIndices(glob, &globidx));
7343       rmapt[cp] = 1;
7344       cmapt[cp] = 2;
7345       cmapa[cp] = globidx;
7346       mptmp[cp] = PETSC_FALSE;
7347       cp++;
7348       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7349       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7350       PetscCall(MatProductSetFill(mp[cp], product->fill));
7351       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7352       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7353       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7354       mp[cp]->product->api_user = product->api_user;
7355       PetscCall(MatProductSetFromOptions(mp[cp]));
7356       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7357       rmapt[cp] = 2;
7358       rmapa[cp] = p->garray;
7359       cmapt[cp] = 2;
7360       cmapa[cp] = globidx;
7361       mptmp[cp] = PETSC_FALSE;
7362       cp++;
7363     }
7364     break;
7365   case MATPRODUCT_PtAP:
7366     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7367     /* P is product->B */
7368     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7369     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7370     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7371     PetscCall(MatProductSetFill(mp[cp], product->fill));
7372     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7373     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7374     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7375     mp[cp]->product->api_user = product->api_user;
7376     PetscCall(MatProductSetFromOptions(mp[cp]));
7377     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7378     PetscCall(ISGetIndices(glob, &globidx));
7379     rmapt[cp] = 2;
7380     rmapa[cp] = globidx;
7381     cmapt[cp] = 2;
7382     cmapa[cp] = globidx;
7383     mptmp[cp] = PETSC_FALSE;
7384     cp++;
7385     if (mmdata->P_oth) {
7386       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7387       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7388       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7389       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7390       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7391       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7392       PetscCall(MatProductSetFill(mp[cp], product->fill));
7393       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7394       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7395       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7396       mp[cp]->product->api_user = product->api_user;
7397       PetscCall(MatProductSetFromOptions(mp[cp]));
7398       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7399       mptmp[cp] = PETSC_TRUE;
7400       cp++;
7401       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7402       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7403       PetscCall(MatProductSetFill(mp[cp], product->fill));
7404       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7405       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7406       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7407       mp[cp]->product->api_user = product->api_user;
7408       PetscCall(MatProductSetFromOptions(mp[cp]));
7409       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7410       rmapt[cp] = 2;
7411       rmapa[cp] = globidx;
7412       cmapt[cp] = 2;
7413       cmapa[cp] = P_oth_idx;
7414       mptmp[cp] = PETSC_FALSE;
7415       cp++;
7416     }
7417     break;
7418   default:
7419     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7420   }
7421   /* sanity check */
7422   if (size > 1)
7423     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7424 
7425   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7426   for (i = 0; i < cp; i++) {
7427     mmdata->mp[i]    = mp[i];
7428     mmdata->mptmp[i] = mptmp[i];
7429   }
7430   mmdata->cp             = cp;
7431   C->product->data       = mmdata;
7432   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7433   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7434 
7435   /* memory type */
7436   mmdata->mtype = PETSC_MEMTYPE_HOST;
7437   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7438   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7439   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7440   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7441   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7442   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7443 
7444   /* prepare coo coordinates for values insertion */
7445 
7446   /* count total nonzeros of those intermediate seqaij Mats
7447     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7448     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7449     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7450   */
7451   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7452     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7453     if (mptmp[cp]) continue;
7454     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7455       const PetscInt *rmap = rmapa[cp];
7456       const PetscInt  mr   = mp[cp]->rmap->n;
7457       const PetscInt  rs   = C->rmap->rstart;
7458       const PetscInt  re   = C->rmap->rend;
7459       const PetscInt *ii   = mm->i;
7460       for (i = 0; i < mr; i++) {
7461         const PetscInt gr = rmap[i];
7462         const PetscInt nz = ii[i + 1] - ii[i];
7463         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7464         else ncoo_oown += nz;                  /* this row is local */
7465       }
7466     } else ncoo_d += mm->nz;
7467   }
7468 
7469   /*
7470     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7471 
7472     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7473 
7474     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7475 
7476     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7477     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7478     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7479 
7480     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7481     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7482   */
7483   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7484   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7485 
7486   /* gather (i,j) of nonzeros inserted by remote procs */
7487   if (hasoffproc) {
7488     PetscSF  msf;
7489     PetscInt ncoo2, *coo_i2, *coo_j2;
7490 
7491     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7492     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7493     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7494 
7495     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7496       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7497       PetscInt   *idxoff = mmdata->off[cp];
7498       PetscInt   *idxown = mmdata->own[cp];
7499       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7500         const PetscInt *rmap = rmapa[cp];
7501         const PetscInt *cmap = cmapa[cp];
7502         const PetscInt *ii   = mm->i;
7503         PetscInt       *coi  = coo_i + ncoo_o;
7504         PetscInt       *coj  = coo_j + ncoo_o;
7505         const PetscInt  mr   = mp[cp]->rmap->n;
7506         const PetscInt  rs   = C->rmap->rstart;
7507         const PetscInt  re   = C->rmap->rend;
7508         const PetscInt  cs   = C->cmap->rstart;
7509         for (i = 0; i < mr; i++) {
7510           const PetscInt *jj = mm->j + ii[i];
7511           const PetscInt  gr = rmap[i];
7512           const PetscInt  nz = ii[i + 1] - ii[i];
7513           if (gr < rs || gr >= re) { /* this is an offproc row */
7514             for (j = ii[i]; j < ii[i + 1]; j++) {
7515               *coi++    = gr;
7516               *idxoff++ = j;
7517             }
7518             if (!cmapt[cp]) { /* already global */
7519               for (j = 0; j < nz; j++) *coj++ = jj[j];
7520             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7521               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7522             } else { /* offdiag */
7523               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7524             }
7525             ncoo_o += nz;
7526           } else { /* this is a local row */
7527             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7528           }
7529         }
7530       }
7531       mmdata->off[cp + 1] = idxoff;
7532       mmdata->own[cp + 1] = idxown;
7533     }
7534 
7535     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7536     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7537     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7538     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7539     ncoo = ncoo_d + ncoo_oown + ncoo2;
7540     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7541     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7542     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7543     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7544     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7545     PetscCall(PetscFree2(coo_i, coo_j));
7546     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7547     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7548     coo_i = coo_i2;
7549     coo_j = coo_j2;
7550   } else { /* no offproc values insertion */
7551     ncoo = ncoo_d;
7552     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7553 
7554     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7555     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7556     PetscCall(PetscSFSetUp(mmdata->sf));
7557   }
7558   mmdata->hasoffproc = hasoffproc;
7559 
7560   /* gather (i,j) of nonzeros inserted locally */
7561   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7562     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7563     PetscInt       *coi  = coo_i + ncoo_d;
7564     PetscInt       *coj  = coo_j + ncoo_d;
7565     const PetscInt *jj   = mm->j;
7566     const PetscInt *ii   = mm->i;
7567     const PetscInt *cmap = cmapa[cp];
7568     const PetscInt *rmap = rmapa[cp];
7569     const PetscInt  mr   = mp[cp]->rmap->n;
7570     const PetscInt  rs   = C->rmap->rstart;
7571     const PetscInt  re   = C->rmap->rend;
7572     const PetscInt  cs   = C->cmap->rstart;
7573 
7574     if (mptmp[cp]) continue;
7575     if (rmapt[cp] == 1) { /* consecutive rows */
7576       /* fill coo_i */
7577       for (i = 0; i < mr; i++) {
7578         const PetscInt gr = i + rs;
7579         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7580       }
7581       /* fill coo_j */
7582       if (!cmapt[cp]) { /* type-0, already global */
7583         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7584       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7585         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7586       } else {                                            /* type-2, local to global for sparse columns */
7587         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7588       }
7589       ncoo_d += mm->nz;
7590     } else if (rmapt[cp] == 2) { /* sparse rows */
7591       for (i = 0; i < mr; i++) {
7592         const PetscInt *jj = mm->j + ii[i];
7593         const PetscInt  gr = rmap[i];
7594         const PetscInt  nz = ii[i + 1] - ii[i];
7595         if (gr >= rs && gr < re) { /* local rows */
7596           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7597           if (!cmapt[cp]) { /* type-0, already global */
7598             for (j = 0; j < nz; j++) *coj++ = jj[j];
7599           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7600             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7601           } else { /* type-2, local to global for sparse columns */
7602             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7603           }
7604           ncoo_d += nz;
7605         }
7606       }
7607     }
7608   }
7609   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7610   PetscCall(ISDestroy(&glob));
7611   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7612   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7613   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7614   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7615 
7616   /* preallocate with COO data */
7617   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7618   PetscCall(PetscFree2(coo_i, coo_j));
7619   PetscFunctionReturn(PETSC_SUCCESS);
7620 }
7621 
7622 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7623 {
7624   Mat_Product *product = mat->product;
7625 #if defined(PETSC_HAVE_DEVICE)
7626   PetscBool match  = PETSC_FALSE;
7627   PetscBool usecpu = PETSC_FALSE;
7628 #else
7629   PetscBool match = PETSC_TRUE;
7630 #endif
7631 
7632   PetscFunctionBegin;
7633   MatCheckProduct(mat, 1);
7634 #if defined(PETSC_HAVE_DEVICE)
7635   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7636   if (match) { /* we can always fallback to the CPU if requested */
7637     switch (product->type) {
7638     case MATPRODUCT_AB:
7639       if (product->api_user) {
7640         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7641         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7642         PetscOptionsEnd();
7643       } else {
7644         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7645         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7646         PetscOptionsEnd();
7647       }
7648       break;
7649     case MATPRODUCT_AtB:
7650       if (product->api_user) {
7651         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7652         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7653         PetscOptionsEnd();
7654       } else {
7655         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7656         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7657         PetscOptionsEnd();
7658       }
7659       break;
7660     case MATPRODUCT_PtAP:
7661       if (product->api_user) {
7662         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7663         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7664         PetscOptionsEnd();
7665       } else {
7666         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7667         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7668         PetscOptionsEnd();
7669       }
7670       break;
7671     default:
7672       break;
7673     }
7674     match = (PetscBool)!usecpu;
7675   }
7676 #endif
7677   if (match) {
7678     switch (product->type) {
7679     case MATPRODUCT_AB:
7680     case MATPRODUCT_AtB:
7681     case MATPRODUCT_PtAP:
7682       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7683       break;
7684     default:
7685       break;
7686     }
7687   }
7688   /* fallback to MPIAIJ ops */
7689   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7690   PetscFunctionReturn(PETSC_SUCCESS);
7691 }
7692 
7693 /*
7694    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7695 
7696    n - the number of block indices in cc[]
7697    cc - the block indices (must be large enough to contain the indices)
7698 */
7699 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7700 {
7701   PetscInt        cnt = -1, nidx, j;
7702   const PetscInt *idx;
7703 
7704   PetscFunctionBegin;
7705   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7706   if (nidx) {
7707     cnt     = 0;
7708     cc[cnt] = idx[0] / bs;
7709     for (j = 1; j < nidx; j++) {
7710       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7711     }
7712   }
7713   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7714   *n = cnt + 1;
7715   PetscFunctionReturn(PETSC_SUCCESS);
7716 }
7717 
7718 /*
7719     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7720 
7721     ncollapsed - the number of block indices
7722     collapsed - the block indices (must be large enough to contain the indices)
7723 */
7724 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7725 {
7726   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7727 
7728   PetscFunctionBegin;
7729   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7730   for (i = start + 1; i < start + bs; i++) {
7731     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7732     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7733     cprevtmp = cprev;
7734     cprev    = merged;
7735     merged   = cprevtmp;
7736   }
7737   *ncollapsed = nprev;
7738   if (collapsed) *collapsed = cprev;
7739   PetscFunctionReturn(PETSC_SUCCESS);
7740 }
7741 
7742 /*
7743  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7744 
7745  Input Parameter:
7746  . Amat - matrix
7747  - symmetrize - make the result symmetric
7748  + scale - scale with diagonal
7749 
7750  Output Parameter:
7751  . a_Gmat - output scalar graph >= 0
7752 
7753 */
7754 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7755 {
7756   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7757   MPI_Comm  comm;
7758   Mat       Gmat;
7759   PetscBool ismpiaij, isseqaij;
7760   Mat       a, b, c;
7761   MatType   jtype;
7762 
7763   PetscFunctionBegin;
7764   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7765   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7766   PetscCall(MatGetSize(Amat, &MM, &NN));
7767   PetscCall(MatGetBlockSize(Amat, &bs));
7768   nloc = (Iend - Istart) / bs;
7769 
7770   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7771   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7772   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7773 
7774   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7775   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7776      implementation */
7777   if (bs > 1) {
7778     PetscCall(MatGetType(Amat, &jtype));
7779     PetscCall(MatCreate(comm, &Gmat));
7780     PetscCall(MatSetType(Gmat, jtype));
7781     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7782     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7783     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7784       PetscInt  *d_nnz, *o_nnz;
7785       MatScalar *aa, val, *AA;
7786       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7787       if (isseqaij) {
7788         a = Amat;
7789         b = NULL;
7790       } else {
7791         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7792         a             = d->A;
7793         b             = d->B;
7794       }
7795       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7796       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7797       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7798         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7799         const PetscInt *cols1, *cols2;
7800         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7801           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7802           nnz[brow / bs] = nc2 / bs;
7803           if (nc2 % bs) ok = 0;
7804           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7805           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7806             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7807             if (nc1 != nc2) ok = 0;
7808             else {
7809               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7810                 if (cols1[jj] != cols2[jj]) ok = 0;
7811                 if (cols1[jj] % bs != jj % bs) ok = 0;
7812               }
7813             }
7814             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7815           }
7816           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7817           if (!ok) {
7818             PetscCall(PetscFree2(d_nnz, o_nnz));
7819             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7820             goto old_bs;
7821           }
7822         }
7823       }
7824       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7825       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7826       PetscCall(PetscFree2(d_nnz, o_nnz));
7827       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7828       // diag
7829       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7830         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7831         ai               = aseq->i;
7832         n                = ai[brow + 1] - ai[brow];
7833         aj               = aseq->j + ai[brow];
7834         for (int k = 0; k < n; k += bs) {        // block columns
7835           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7836           val        = 0;
7837           for (int ii = 0; ii < bs; ii++) { // rows in block
7838             aa = aseq->a + ai[brow + ii] + k;
7839             for (int jj = 0; jj < bs; jj++) {         // columns in block
7840               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7841             }
7842           }
7843           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7844           AA[k / bs] = val;
7845         }
7846         grow = Istart / bs + brow / bs;
7847         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7848       }
7849       // off-diag
7850       if (ismpiaij) {
7851         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7852         const PetscScalar *vals;
7853         const PetscInt    *cols, *garray = aij->garray;
7854         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7855         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7856           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7857           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7858             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7859             AA[k / bs] = 0;
7860             AJ[cidx]   = garray[cols[k]] / bs;
7861           }
7862           nc = ncols / bs;
7863           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7864           for (int ii = 0; ii < bs; ii++) { // rows in block
7865             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7866             for (int k = 0; k < ncols; k += bs) {
7867               for (int jj = 0; jj < bs; jj++) { // cols in block
7868                 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7869                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7870               }
7871             }
7872             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7873           }
7874           grow = Istart / bs + brow / bs;
7875           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7876         }
7877       }
7878       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7879       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7880       PetscCall(PetscFree2(AA, AJ));
7881     } else {
7882       const PetscScalar *vals;
7883       const PetscInt    *idx;
7884       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7885     old_bs:
7886       /*
7887        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7888        */
7889       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7890       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7891       if (isseqaij) {
7892         PetscInt max_d_nnz;
7893         /*
7894          Determine exact preallocation count for (sequential) scalar matrix
7895          */
7896         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7897         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7898         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7899         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7900         PetscCall(PetscFree3(w0, w1, w2));
7901       } else if (ismpiaij) {
7902         Mat             Daij, Oaij;
7903         const PetscInt *garray;
7904         PetscInt        max_d_nnz;
7905         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7906         /*
7907          Determine exact preallocation count for diagonal block portion of scalar matrix
7908          */
7909         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7910         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7911         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7912         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7913         PetscCall(PetscFree3(w0, w1, w2));
7914         /*
7915          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7916          */
7917         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7918           o_nnz[jj] = 0;
7919           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7920             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7921             o_nnz[jj] += ncols;
7922             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7923           }
7924           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7925         }
7926       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7927       /* get scalar copy (norms) of matrix */
7928       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7929       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7930       PetscCall(PetscFree2(d_nnz, o_nnz));
7931       for (Ii = Istart; Ii < Iend; Ii++) {
7932         PetscInt dest_row = Ii / bs;
7933         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7934         for (jj = 0; jj < ncols; jj++) {
7935           PetscInt    dest_col = idx[jj] / bs;
7936           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7937           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7938         }
7939         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7940       }
7941       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7942       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7943     }
7944   } else {
7945     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7946     else {
7947       Gmat = Amat;
7948       PetscCall(PetscObjectReference((PetscObject)Gmat));
7949     }
7950     if (isseqaij) {
7951       a = Gmat;
7952       b = NULL;
7953     } else {
7954       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7955       a             = d->A;
7956       b             = d->B;
7957     }
7958     if (filter >= 0 || scale) {
7959       /* take absolute value of each entry */
7960       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7961         MatInfo      info;
7962         PetscScalar *avals;
7963         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7964         PetscCall(MatSeqAIJGetArray(c, &avals));
7965         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7966         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7967       }
7968     }
7969   }
7970   if (symmetrize) {
7971     PetscBool isset, issym;
7972     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7973     if (!isset || !issym) {
7974       Mat matTrans;
7975       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7976       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7977       PetscCall(MatDestroy(&matTrans));
7978     }
7979     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7980   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7981   if (scale) {
7982     /* scale c for all diagonal values = 1 or -1 */
7983     Vec diag;
7984     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7985     PetscCall(MatGetDiagonal(Gmat, diag));
7986     PetscCall(VecReciprocal(diag));
7987     PetscCall(VecSqrtAbs(diag));
7988     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7989     PetscCall(VecDestroy(&diag));
7990   }
7991   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7992 
7993   if (filter >= 0) {
7994     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
7995     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
7996   }
7997   *a_Gmat = Gmat;
7998   PetscFunctionReturn(PETSC_SUCCESS);
7999 }
8000 
8001 /*
8002     Special version for direct calls from Fortran
8003 */
8004 #include <petsc/private/fortranimpl.h>
8005 
8006 /* Change these macros so can be used in void function */
8007 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8008 #undef PetscCall
8009 #define PetscCall(...) \
8010   do { \
8011     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8012     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8013       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8014       return; \
8015     } \
8016   } while (0)
8017 
8018 #undef SETERRQ
8019 #define SETERRQ(comm, ierr, ...) \
8020   do { \
8021     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8022     return; \
8023   } while (0)
8024 
8025 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8026   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8027 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8028   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8029 #else
8030 #endif
8031 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8032 {
8033   Mat         mat = *mmat;
8034   PetscInt    m = *mm, n = *mn;
8035   InsertMode  addv = *maddv;
8036   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8037   PetscScalar value;
8038 
8039   MatCheckPreallocated(mat, 1);
8040   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8041   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8042   {
8043     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8044     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8045     PetscBool roworiented = aij->roworiented;
8046 
8047     /* Some Variables required in the macro */
8048     Mat         A     = aij->A;
8049     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8050     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8051     MatScalar  *aa;
8052     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8053     Mat         B                 = aij->B;
8054     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8055     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8056     MatScalar  *ba;
8057     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8058      * cannot use "#if defined" inside a macro. */
8059     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8060 
8061     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8062     PetscInt   nonew = a->nonew;
8063     MatScalar *ap1, *ap2;
8064 
8065     PetscFunctionBegin;
8066     PetscCall(MatSeqAIJGetArray(A, &aa));
8067     PetscCall(MatSeqAIJGetArray(B, &ba));
8068     for (i = 0; i < m; i++) {
8069       if (im[i] < 0) continue;
8070       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8071       if (im[i] >= rstart && im[i] < rend) {
8072         row      = im[i] - rstart;
8073         lastcol1 = -1;
8074         rp1      = aj + ai[row];
8075         ap1      = aa + ai[row];
8076         rmax1    = aimax[row];
8077         nrow1    = ailen[row];
8078         low1     = 0;
8079         high1    = nrow1;
8080         lastcol2 = -1;
8081         rp2      = bj + bi[row];
8082         ap2      = ba + bi[row];
8083         rmax2    = bimax[row];
8084         nrow2    = bilen[row];
8085         low2     = 0;
8086         high2    = nrow2;
8087 
8088         for (j = 0; j < n; j++) {
8089           if (roworiented) value = v[i * n + j];
8090           else value = v[i + j * m];
8091           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8092           if (in[j] >= cstart && in[j] < cend) {
8093             col = in[j] - cstart;
8094             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8095           } else if (in[j] < 0) continue;
8096           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8097             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8098           } else {
8099             if (mat->was_assembled) {
8100               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8101 #if defined(PETSC_USE_CTABLE)
8102               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8103               col--;
8104 #else
8105               col = aij->colmap[in[j]] - 1;
8106 #endif
8107               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8108                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8109                 col = in[j];
8110                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8111                 B        = aij->B;
8112                 b        = (Mat_SeqAIJ *)B->data;
8113                 bimax    = b->imax;
8114                 bi       = b->i;
8115                 bilen    = b->ilen;
8116                 bj       = b->j;
8117                 rp2      = bj + bi[row];
8118                 ap2      = ba + bi[row];
8119                 rmax2    = bimax[row];
8120                 nrow2    = bilen[row];
8121                 low2     = 0;
8122                 high2    = nrow2;
8123                 bm       = aij->B->rmap->n;
8124                 ba       = b->a;
8125                 inserted = PETSC_FALSE;
8126               }
8127             } else col = in[j];
8128             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8129           }
8130         }
8131       } else if (!aij->donotstash) {
8132         if (roworiented) {
8133           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8134         } else {
8135           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8136         }
8137       }
8138     }
8139     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8140     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8141   }
8142   PetscFunctionReturnVoid();
8143 }
8144 
8145 /* Undefining these here since they were redefined from their original definition above! No
8146  * other PETSc functions should be defined past this point, as it is impossible to recover the
8147  * original definitions */
8148 #undef PetscCall
8149 #undef SETERRQ
8150